PaddlePaddle
diff --git a/‎paddle/fluid/extension/include/ext_tensor.h‎
Lines changed: 3 additions & 0 deletions b/‎paddle/fluid/extension/include/ext_tensor.h‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎paddle/fluid/extension/src/ext_tensor.cc‎
Lines changed: 20 additions & 1 deletion b/‎paddle/fluid/extension/src/ext_tensor.cc‎
Lines changed: 20 additions & 1 deletion
diff --git a/‎paddle/fluid/framework/custom_tensor_utils.h‎
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/framework/custom_tensor_utils.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/fluid/inference/api/analysis_predictor.cc‎
Lines changed: 2 additions & 0 deletions b/‎paddle/fluid/inference/api/analysis_predictor.cc‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/tensorrt/convert/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions b/‎paddle/fluid/inference/tensorrt/convert/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/tensorrt/convert/affine_channel_op.cc‎
Lines changed: 94 additions & 0 deletions b/‎paddle/fluid/inference/tensorrt/convert/affine_channel_op.cc‎
Lines changed: 94 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/tensorrt/convert/roi_align_op.cc‎
Lines changed: 86 additions & 0 deletions b/‎paddle/fluid/inference/tensorrt/convert/roi_align_op.cc‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/tensorrt/op_teller.cc‎
Lines changed: 32 additions & 0 deletions b/‎paddle/fluid/inference/tensorrt/op_teller.cc‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
@@ -52,6 +52,9 @@ class PD_DLL_DECL Tensor {
   /// \brief Construct a Tensor on target Place for CustomOp.
   /// Generally it's only used for user to create Tensor.
   explicit Tensor(const PlaceType& place);
+  /// \brief Construct a Tensor on target Place with shape for CustomOp.
+  /// Generally it's only used for user to create Tensor.
+  Tensor(const PlaceType& place, const std::vector<int64_t>& shape);
   /// \brief Reset the shape of the tensor.
   /// Generally it's only used for the input tensor.
   /// Reshape must be called before calling
 
@@ -102,13 +102,32 @@ void GpuCopy(T *src, T *dst, PlaceType src_plc, PlaceType dst_plc,
 
 void Tensor::reshape(const std::vector<int64_t> &shape) {
   GET_CASTED_TENSOR
-  tensor->Resize(framework::make_ddim(shape));
+  auto new_dim = framework::make_ddim(shape);
+  if (tensor->numel() != framework::product(new_dim)) {
+    LOG(WARNING) << "Custom Op: Calling reshape to a new shape which is bigger "
+                    "or smaller"
+                 << "than original shape will not change your tensor's memory "
+                    "Please call"
+                 << "paddle::Tensor::mutable_data<T>() after to reallocate "
+                    "your tensor's size."
+                 << std::endl;
+  }
+  tensor->Resize(new_dim);
 }
 
 Tensor::Tensor(const PlaceType &place)
     : tensor_(std::make_shared<framework::LoDTensor>()),
       place_(place),
       stream_(StreamWrapper()) {}
+
+Tensor::Tensor(const PlaceType &place, const std::vector<int64_t> &shape)
+    : tensor_(std::make_shared<framework::LoDTensor>()),
+      place_(place),
+      stream_(StreamWrapper()) {
+  GET_CASTED_TENSOR
+  tensor->Resize(framework::make_ddim(shape));
+}
+
 template <typename T>
 T *Tensor::mutable_data(const PlaceType &place) {
   place_ = place;
 
@@ -37,7 +37,7 @@ class CustomTensorUtils {
   /// \brief Share data FROM another tensor.
   /// Use this to pass tensor from op to op
   /// \return void.
-  static void ShareDataFrom(const void* src, const Tensor& dst);
+  static void ShareDataFrom(const void* src, const paddle::Tensor& dst);
 
   static framework::proto::VarType::Type ConvertEnumDTypeToInnerDType(
       const paddle::DataType& dtype) {
 
@@ -1193,6 +1193,8 @@ USE_TRT_CONVERTER(stack);
 USE_TRT_CONVERTER(clip);
 USE_TRT_CONVERTER(gather);
 USE_TRT_CONVERTER(yolo_box);
+USE_TRT_CONVERTER(roi_align);
+USE_TRT_CONVERTER(affine_channel);
 USE_TRT_CONVERTER(multiclass_nms);
 USE_TRT_CONVERTER(nearest_interp);
 #endif
 
@@ -7,6 +7,8 @@ nv_library(tensorrt_converter
                 emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc clip_op.cc
                 gather_op.cc
                 yolo_box_op.cc
+                roi_align_op.cc
+                affine_channel_op.cc
                 multiclass_nms_op.cc
                 nearest_interp_op.cc
            DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry)
 
@@ -0,0 +1,94 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/framework/data_layout.h"
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+
+namespace paddle {
+namespace framework {
+class Scope;
+
+namespace proto {
+class OpDesc;
+}  // namespace proto
+}  // namespace framework
+}  // namespace paddle
+
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+
+/*
+ * Affine Channel Op
+ */
+class AffineChannelOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope, bool test_mode) override {
+    VLOG(3) << "convert a fluid affine_channel op to tensorrt scale nd layer";
+
+    framework::OpDesc op_desc(op, nullptr);
+    std::string input_name = op_desc.Input("X").front();
+    std::string scale_name = op_desc.Input("Scale").front();
+    std::string bias_name = op_desc.Input("Bias").front();
+    std::string output_name = op_desc.Output("Out").front();
+
+    auto input_tensor = engine_->GetITensor(input_name);
+    auto idim = input_tensor->getDimensions();
+
+    auto* scale_v = scope.FindVar(scale_name);
+    auto* scale_t = scale_v->GetMutable<framework::LoDTensor>();
+    float* scale_ptr = engine_->GetWeightCPUData(scale_name, scale_t, false);
+
+    auto* bias_v = scope.FindVar(bias_name);
+    auto* bias_t = bias_v->GetMutable<framework::LoDTensor>();
+    float* bias_ptr = engine_->GetWeightCPUData(bias_name, bias_t, false);
+
+    auto data_layout = framework::StringToDataLayout(
+        BOOST_GET_CONST(std::string, op_desc.GetAttr("data_layout")));
+
+    PADDLE_ENFORCE_EQ(
+        data_layout, framework::DataLayout::kNCHW,
+        platform::errors::InvalidArgument(
+            "TensorRT affine channel converter can only convert NCHW format. "
+            "Other format should be run in fluid mode. Report a bug on github "
+            "issue if you see this line."));
+
+    // tensorrt scalend layer only support spatial dims >= 2,
+    // so nhwc is not availabe (spatial dims == 0)
+    const int channel_axis = engine_->with_dynamic_shape();
+
+    TensorRTEngine::Weight scale_weights{nvinfer1::DataType::kFLOAT,
+                                         static_cast<void*>(scale_ptr),
+                                         (size_t)idim.d[channel_axis]};
+    TensorRTEngine::Weight bias_weights{nvinfer1::DataType::kFLOAT,
+                                        static_cast<void*>(bias_ptr),
+                                        (size_t)idim.d[channel_axis]};
+    TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr,
+                                         0};
+
+    auto layer = TRT_ENGINE_ADD_LAYER(engine_, ScaleNd, *input_tensor,
+                                      nvinfer1::ScaleMode::kCHANNEL,
+                                      bias_weights.get(), scale_weights.get(),
+                                      power_weights.get(), channel_axis);
+
+    RreplenishLayerAndOutput(layer, "affine_channel", {output_name}, test_mode);
+  }
+};
+
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(affine_channel, AffineChannelOpConverter);
@@ -0,0 +1,86 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+#include "paddle/fluid/inference/tensorrt/plugin/roi_align_op_plugin.h"
+
+namespace paddle {
+namespace framework {
+class Scope;
+
+namespace proto {
+class OpDesc;
+}  // namespace proto
+}  // namespace framework
+}  // namespace paddle
+
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+
+/*
+ * Roi Align Op
+ */
+class RoiAlignOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope, bool test_mode) override {
+    VLOG(3) << "convert a fluid roi align op to tensorrt plugin";
+
+    framework::OpDesc op_desc(op, nullptr);
+    std::string input_name = op_desc.Input("X").front();
+    std::string rois_name = op_desc.Input("ROIs").front();
+    std::string output_name = op_desc.Output("Out").front();
+
+    const auto pooled_height =
+        BOOST_GET_CONST(int, op_desc.GetAttr("pooled_height"));
+    const auto pooled_width =
+        BOOST_GET_CONST(int, op_desc.GetAttr("pooled_width"));
+    const auto spatial_scale =
+        BOOST_GET_CONST(float, op_desc.GetAttr("spatial_scale"));
+    const auto sampling_ratio =
+        BOOST_GET_CONST(int, op_desc.GetAttr("sampling_ratio"));
+
+    const auto input_tensor = engine_->GetITensor(input_name);
+    const auto rois_tensor = engine_->GetITensor(rois_name);
+
+    const nvinfer1::DataType data_type_ = engine_->WithFp16()
+                                              ? nvinfer1::DataType::kHALF
+                                              : nvinfer1::DataType::kFLOAT;
+
+    std::vector<nvinfer1::ITensor*> inputs{input_tensor, rois_tensor};
+    nvinfer1::ILayer* layer = nullptr;
+
+    PADDLE_ENFORCE_EQ(
+        engine_->with_dynamic_shape(), true,
+        platform::errors::InvalidArgument(
+            "TRT roi align plugin only accept the dynamic shape, because that "
+            "the roi_align will change the batch size."));
+
+    auto* roi_align_plugin = new plugin::RoiAlignPluginDynamic(
+        data_type_, pooled_height, pooled_width, spatial_scale, sampling_ratio);
+    auto roi_align_layer = engine_->network()->addPluginV2(
+        inputs.data(), inputs.size(), *roi_align_plugin);
+    layer = roi_align_layer;
+
+    std::vector<std::string> output_names{output_name};
+    RreplenishLayerAndOutput(layer, "roi_align", output_names, test_mode);
+  }
+};
+
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(roi_align, RoiAlignOpConverter);
@@ -112,6 +112,8 @@ struct SimpleOpTypeSetTeller : public Teller {
       "flatten",
       "gather",
       "yolo_box",
+      "roi_align",
+      "affine_channel",
       "multiclass_nms",
       "nearest_interp",
   };
@@ -206,6 +208,13 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
       return has_attrs;
     }
 
+    if (op_type == "affine_channel") {
+      if (!desc.HasAttr("data_layout")) return false;
+      auto data_layout = framework::StringToDataLayout(
+          BOOST_GET_CONST(std::string, desc.GetAttr("data_layout")));
+      if (data_layout != framework::DataLayout::kNCHW) return false;
+    }
+
     if (op_type == "multiclass_nms") {
       if (with_dynamic_shape) return false;
       auto* block = desc.Block();
@@ -248,6 +257,7 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
         return false;
       }
     }
+
     if (op_type == "nearest_interp") {
       std::vector<std::string> attrs{"data_layout",   "interp_method",
                                      "align_corners", "scale",
@@ -265,6 +275,28 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
       if (interp_method != "nearest") return false;
     }
 
+    if (op_type == "roi_align") {
+      if (!with_dynamic_shape) return false;
+
+      std::vector<std::string> attrs{"pooled_height", "pooled_width",
+                                     "spatial_scale", "sampling_ratio"};
+      for (auto const attr : attrs) {
+        if (!desc.HasAttr(attr)) return false;
+      }
+
+      const auto pooled_height =
+          BOOST_GET_CONST(int, desc.GetAttr("pooled_height"));
+      if (pooled_height <= 0) return false;
+
+      const auto pooled_width =
+          BOOST_GET_CONST(int, desc.GetAttr("pooled_width"));
+      if (pooled_width <= 0) return false;
+
+      const auto spatial_scale =
+          BOOST_GET_CONST(float, desc.GetAttr("spatial_scale"));
+      if (spatial_scale <= 0.f) return false;
+    }
+
     if ((*teller)(op_type, desc, use_no_calib_int8)) return true;
   }
   return false;
 
@@ -6,7 +6,7 @@ nv_library(tensorrt_plugin
            qkv_to_context_plugin.cu skip_layernorm_op_plugin.cu slice_op_plugin.cu
            hard_swish_op_plugin.cu stack_op_plugin.cu special_slice_plugin.cu
            yolo_box_op_plugin.cu
-
+           roi_align_op_plugin.cu
            DEPS enforce tensorrt_engine prelu tensor bert_encoder_functor)
 
 nv_test(test_split_plugin SRCS test_split_plugin.cc DEPS