Skip to content

Commit 7f8bf2e

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into trt_yolobox
2 parents 98e4237 + 51eb29d commit 7f8bf2e

File tree

15 files changed

+996
-7
lines changed

15 files changed

+996
-7
lines changed

paddle/fluid/extension/include/ext_tensor.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ class PD_DLL_DECL Tensor {
5252
/// \brief Construct a Tensor on target Place for CustomOp.
5353
/// Generally it's only used for user to create Tensor.
5454
explicit Tensor(const PlaceType& place);
55+
/// \brief Construct a Tensor on target Place with shape for CustomOp.
56+
/// Generally it's only used for user to create Tensor.
57+
Tensor(const PlaceType& place, const std::vector<int64_t>& shape);
5558
/// \brief Reset the shape of the tensor.
5659
/// Generally it's only used for the input tensor.
5760
/// Reshape must be called before calling

paddle/fluid/extension/src/ext_tensor.cc

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,13 +102,32 @@ void GpuCopy(T *src, T *dst, PlaceType src_plc, PlaceType dst_plc,
102102

103103
void Tensor::reshape(const std::vector<int64_t> &shape) {
104104
GET_CASTED_TENSOR
105-
tensor->Resize(framework::make_ddim(shape));
105+
auto new_dim = framework::make_ddim(shape);
106+
if (tensor->numel() != framework::product(new_dim)) {
107+
LOG(WARNING) << "Custom Op: Calling reshape to a new shape which is bigger "
108+
"or smaller"
109+
<< "than original shape will not change your tensor's memory "
110+
"Please call"
111+
<< "paddle::Tensor::mutable_data<T>() after to reallocate "
112+
"your tensor's size."
113+
<< std::endl;
114+
}
115+
tensor->Resize(new_dim);
106116
}
107117

108118
Tensor::Tensor(const PlaceType &place)
109119
: tensor_(std::make_shared<framework::LoDTensor>()),
110120
place_(place),
111121
stream_(StreamWrapper()) {}
122+
123+
Tensor::Tensor(const PlaceType &place, const std::vector<int64_t> &shape)
124+
: tensor_(std::make_shared<framework::LoDTensor>()),
125+
place_(place),
126+
stream_(StreamWrapper()) {
127+
GET_CASTED_TENSOR
128+
tensor->Resize(framework::make_ddim(shape));
129+
}
130+
112131
template <typename T>
113132
T *Tensor::mutable_data(const PlaceType &place) {
114133
place_ = place;

paddle/fluid/framework/custom_tensor_utils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class CustomTensorUtils {
3737
/// \brief Share data FROM another tensor.
3838
/// Use this to pass tensor from op to op
3939
/// \return void.
40-
static void ShareDataFrom(const void* src, const Tensor& dst);
40+
static void ShareDataFrom(const void* src, const paddle::Tensor& dst);
4141

4242
static framework::proto::VarType::Type ConvertEnumDTypeToInnerDType(
4343
const paddle::DataType& dtype) {

paddle/fluid/inference/api/analysis_predictor.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1193,6 +1193,8 @@ USE_TRT_CONVERTER(stack);
11931193
USE_TRT_CONVERTER(clip);
11941194
USE_TRT_CONVERTER(gather);
11951195
USE_TRT_CONVERTER(yolo_box);
1196+
USE_TRT_CONVERTER(roi_align);
1197+
USE_TRT_CONVERTER(affine_channel);
11961198
USE_TRT_CONVERTER(multiclass_nms);
11971199
USE_TRT_CONVERTER(nearest_interp);
11981200
#endif

paddle/fluid/inference/tensorrt/convert/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ nv_library(tensorrt_converter
77
emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc clip_op.cc
88
gather_op.cc
99
yolo_box_op.cc
10+
roi_align_op.cc
11+
affine_channel_op.cc
1012
multiclass_nms_op.cc
1113
nearest_interp_op.cc
1214
DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry)
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/fluid/framework/data_layout.h"
16+
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
17+
18+
namespace paddle {
19+
namespace framework {
20+
class Scope;
21+
22+
namespace proto {
23+
class OpDesc;
24+
} // namespace proto
25+
} // namespace framework
26+
} // namespace paddle
27+
28+
namespace paddle {
29+
namespace inference {
30+
namespace tensorrt {
31+
32+
/*
33+
* Affine Channel Op
34+
*/
35+
class AffineChannelOpConverter : public OpConverter {
36+
public:
37+
void operator()(const framework::proto::OpDesc& op,
38+
const framework::Scope& scope, bool test_mode) override {
39+
VLOG(3) << "convert a fluid affine_channel op to tensorrt scale nd layer";
40+
41+
framework::OpDesc op_desc(op, nullptr);
42+
std::string input_name = op_desc.Input("X").front();
43+
std::string scale_name = op_desc.Input("Scale").front();
44+
std::string bias_name = op_desc.Input("Bias").front();
45+
std::string output_name = op_desc.Output("Out").front();
46+
47+
auto input_tensor = engine_->GetITensor(input_name);
48+
auto idim = input_tensor->getDimensions();
49+
50+
auto* scale_v = scope.FindVar(scale_name);
51+
auto* scale_t = scale_v->GetMutable<framework::LoDTensor>();
52+
float* scale_ptr = engine_->GetWeightCPUData(scale_name, scale_t, false);
53+
54+
auto* bias_v = scope.FindVar(bias_name);
55+
auto* bias_t = bias_v->GetMutable<framework::LoDTensor>();
56+
float* bias_ptr = engine_->GetWeightCPUData(bias_name, bias_t, false);
57+
58+
auto data_layout = framework::StringToDataLayout(
59+
BOOST_GET_CONST(std::string, op_desc.GetAttr("data_layout")));
60+
61+
PADDLE_ENFORCE_EQ(
62+
data_layout, framework::DataLayout::kNCHW,
63+
platform::errors::InvalidArgument(
64+
"TensorRT affine channel converter can only convert NCHW format. "
65+
"Other format should be run in fluid mode. Report a bug on github "
66+
"issue if you see this line."));
67+
68+
// tensorrt scalend layer only support spatial dims >= 2,
69+
// so nhwc is not availabe (spatial dims == 0)
70+
const int channel_axis = engine_->with_dynamic_shape();
71+
72+
TensorRTEngine::Weight scale_weights{nvinfer1::DataType::kFLOAT,
73+
static_cast<void*>(scale_ptr),
74+
(size_t)idim.d[channel_axis]};
75+
TensorRTEngine::Weight bias_weights{nvinfer1::DataType::kFLOAT,
76+
static_cast<void*>(bias_ptr),
77+
(size_t)idim.d[channel_axis]};
78+
TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr,
79+
0};
80+
81+
auto layer = TRT_ENGINE_ADD_LAYER(engine_, ScaleNd, *input_tensor,
82+
nvinfer1::ScaleMode::kCHANNEL,
83+
bias_weights.get(), scale_weights.get(),
84+
power_weights.get(), channel_axis);
85+
86+
RreplenishLayerAndOutput(layer, "affine_channel", {output_name}, test_mode);
87+
}
88+
};
89+
90+
} // namespace tensorrt
91+
} // namespace inference
92+
} // namespace paddle
93+
94+
REGISTER_TRT_OP_CONVERTER(affine_channel, AffineChannelOpConverter);
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
16+
#include "paddle/fluid/inference/tensorrt/plugin/roi_align_op_plugin.h"
17+
18+
namespace paddle {
19+
namespace framework {
20+
class Scope;
21+
22+
namespace proto {
23+
class OpDesc;
24+
} // namespace proto
25+
} // namespace framework
26+
} // namespace paddle
27+
28+
namespace paddle {
29+
namespace inference {
30+
namespace tensorrt {
31+
32+
/*
33+
* Roi Align Op
34+
*/
35+
class RoiAlignOpConverter : public OpConverter {
36+
public:
37+
void operator()(const framework::proto::OpDesc& op,
38+
const framework::Scope& scope, bool test_mode) override {
39+
VLOG(3) << "convert a fluid roi align op to tensorrt plugin";
40+
41+
framework::OpDesc op_desc(op, nullptr);
42+
std::string input_name = op_desc.Input("X").front();
43+
std::string rois_name = op_desc.Input("ROIs").front();
44+
std::string output_name = op_desc.Output("Out").front();
45+
46+
const auto pooled_height =
47+
BOOST_GET_CONST(int, op_desc.GetAttr("pooled_height"));
48+
const auto pooled_width =
49+
BOOST_GET_CONST(int, op_desc.GetAttr("pooled_width"));
50+
const auto spatial_scale =
51+
BOOST_GET_CONST(float, op_desc.GetAttr("spatial_scale"));
52+
const auto sampling_ratio =
53+
BOOST_GET_CONST(int, op_desc.GetAttr("sampling_ratio"));
54+
55+
const auto input_tensor = engine_->GetITensor(input_name);
56+
const auto rois_tensor = engine_->GetITensor(rois_name);
57+
58+
const nvinfer1::DataType data_type_ = engine_->WithFp16()
59+
? nvinfer1::DataType::kHALF
60+
: nvinfer1::DataType::kFLOAT;
61+
62+
std::vector<nvinfer1::ITensor*> inputs{input_tensor, rois_tensor};
63+
nvinfer1::ILayer* layer = nullptr;
64+
65+
PADDLE_ENFORCE_EQ(
66+
engine_->with_dynamic_shape(), true,
67+
platform::errors::InvalidArgument(
68+
"TRT roi align plugin only accept the dynamic shape, because that "
69+
"the roi_align will change the batch size."));
70+
71+
auto* roi_align_plugin = new plugin::RoiAlignPluginDynamic(
72+
data_type_, pooled_height, pooled_width, spatial_scale, sampling_ratio);
73+
auto roi_align_layer = engine_->network()->addPluginV2(
74+
inputs.data(), inputs.size(), *roi_align_plugin);
75+
layer = roi_align_layer;
76+
77+
std::vector<std::string> output_names{output_name};
78+
RreplenishLayerAndOutput(layer, "roi_align", output_names, test_mode);
79+
}
80+
};
81+
82+
} // namespace tensorrt
83+
} // namespace inference
84+
} // namespace paddle
85+
86+
REGISTER_TRT_OP_CONVERTER(roi_align, RoiAlignOpConverter);

paddle/fluid/inference/tensorrt/op_teller.cc

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@ struct SimpleOpTypeSetTeller : public Teller {
112112
"flatten",
113113
"gather",
114114
"yolo_box",
115+
"roi_align",
116+
"affine_channel",
115117
"multiclass_nms",
116118
"nearest_interp",
117119
};
@@ -206,6 +208,13 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
206208
return has_attrs;
207209
}
208210

211+
if (op_type == "affine_channel") {
212+
if (!desc.HasAttr("data_layout")) return false;
213+
auto data_layout = framework::StringToDataLayout(
214+
BOOST_GET_CONST(std::string, desc.GetAttr("data_layout")));
215+
if (data_layout != framework::DataLayout::kNCHW) return false;
216+
}
217+
209218
if (op_type == "multiclass_nms") {
210219
if (with_dynamic_shape) return false;
211220
auto* block = desc.Block();
@@ -248,6 +257,7 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
248257
return false;
249258
}
250259
}
260+
251261
if (op_type == "nearest_interp") {
252262
std::vector<std::string> attrs{"data_layout", "interp_method",
253263
"align_corners", "scale",
@@ -265,6 +275,28 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
265275
if (interp_method != "nearest") return false;
266276
}
267277

278+
if (op_type == "roi_align") {
279+
if (!with_dynamic_shape) return false;
280+
281+
std::vector<std::string> attrs{"pooled_height", "pooled_width",
282+
"spatial_scale", "sampling_ratio"};
283+
for (auto const attr : attrs) {
284+
if (!desc.HasAttr(attr)) return false;
285+
}
286+
287+
const auto pooled_height =
288+
BOOST_GET_CONST(int, desc.GetAttr("pooled_height"));
289+
if (pooled_height <= 0) return false;
290+
291+
const auto pooled_width =
292+
BOOST_GET_CONST(int, desc.GetAttr("pooled_width"));
293+
if (pooled_width <= 0) return false;
294+
295+
const auto spatial_scale =
296+
BOOST_GET_CONST(float, desc.GetAttr("spatial_scale"));
297+
if (spatial_scale <= 0.f) return false;
298+
}
299+
268300
if ((*teller)(op_type, desc, use_no_calib_int8)) return true;
269301
}
270302
return false;

paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ nv_library(tensorrt_plugin
66
qkv_to_context_plugin.cu skip_layernorm_op_plugin.cu slice_op_plugin.cu
77
hard_swish_op_plugin.cu stack_op_plugin.cu special_slice_plugin.cu
88
yolo_box_op_plugin.cu
9-
9+
roi_align_op_plugin.cu
1010
DEPS enforce tensorrt_engine prelu tensor bert_encoder_functor)
1111

1212
nv_test(test_split_plugin SRCS test_split_plugin.cc DEPS

0 commit comments

Comments
 (0)