PaddlePaddle · b3602sss · Sep 10, 2021 · Sep 6, 2021 · Sep 6, 2021 · Sep 7, 2021
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -1257,6 +1257,8 @@ USE_TRT_CONVERTER(reduce_sum);
 USE_TRT_CONVERTER(gather_nd);
 USE_TRT_CONVERTER(reduce_mean);
 USE_TRT_CONVERTER(tile);
+USE_TRT_CONVERTER(conv3d);
+USE_TRT_CONVERTER(conv3d_transpose);
 #endif
 
 namespace paddle_infer {

diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -16,6 +16,7 @@ nv_library(tensorrt_converter
                 reduce_op.cc
                 gather_nd_op.cc
                 tile_op.cc
+                conv3d_op.cc
            DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry)
 
 nv_test(test_op_converter SRCS test_op_converter.cc DEPS

diff --git a/paddle/fluid/inference/tensorrt/convert/conv3d_op.cc b/paddle/fluid/inference/tensorrt/convert/conv3d_op.cc
@@ -0,0 +1,172 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+
+namespace paddle {
+namespace framework {
+class Scope;
+
+namespace proto {
+class OpDesc;
+}  // namespace proto
+}  // namespace framework
+}  // namespace paddle
+
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+
+template <typename RegistFunc, typename SetDilationFunc>
+void ConvertConv3d(TensorRTEngine* engine, const framework::proto::OpDesc& op,
+                   const framework::Scope& scope, bool test_mode,
+                   RegistFunc fadd_layer, SetDilationFunc fset_dilation,
+                   const std::string& name) {
+  VLOG(3) << "convert a fluid " << name << " op to tensorrt layer without bias";
+
+  framework::OpDesc op_desc(op, nullptr);
+
+  auto* X = engine->GetITensor(op_desc.Input("Input").front());
+  std::string filter_var_name = op_desc.Input("Filter").front();
+  auto* Y_v = scope.FindVar(filter_var_name);
+  PADDLE_ENFORCE_NOT_NULL(
+      Y_v, platform::errors::NotFound(
+               "Can not find %s presistale var in scope.", filter_var_name));
+  auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
+  float* weight_data = nullptr;
+  bool enable_int8 = op_desc.HasAttr("enable_int8");
+
+  if (enable_int8) {
+#if IS_TRT_VERSION_GE(5000)
+    float in_scale =
+        BOOST_GET_CONST(float, op_desc.GetAttr("Input_scale")) * 127;
+    auto weight_scale =
+        BOOST_GET_CONST(std::vector<float>, op_desc.GetAttr("weight_scale"));
+    weight_data = engine->GetWeightCPUData(op_desc.Input("Filter").front(), Y_t,
+                                           true, weight_scale);
+    engine->SetTensorDynamicRange(X, in_scale);
+#endif
+  } else {
+    weight_data =
+        engine->GetWeightCPUData(op_desc.Input("Filter").front(), Y_t, false);
+  }
+
+  PADDLE_ENFORCE_EQ(Y_t->dims().size(), 5UL,
+                    platform::errors::InvalidArgument(
+                        "The conv3d filter's dims size should be 5, but got %d",
+                        Y_t->dims().size()));
+
+  const int n_output = Y_t->dims()[0];
+  const int n_input = Y_t->dims()[1];
+  const int filter_d = Y_t->dims()[2];
+  const int filter_h = Y_t->dims()[3];
+  const int filter_w = Y_t->dims()[4];
+  const int groups = BOOST_GET_CONST(int, op_desc.GetAttr("groups"));
+  const std::vector<int> dilations =
+      BOOST_GET_CONST(std::vector<int>, op_desc.GetAttr("dilations"));
+  const std::vector<int> strides =
+      BOOST_GET_CONST(std::vector<int>, op_desc.GetAttr("strides"));
+  const std::vector<int> paddings =
+      BOOST_GET_CONST(std::vector<int>, op_desc.GetAttr("paddings"));
+  std::string padding_algorithm = "EXPLICIT";
+  if (op_desc.HasAttr("padding_algorithm"))
+    padding_algorithm =
+        BOOST_GET_CONST(std::string, op_desc.GetAttr("padding_algorithm"));
+
+  nvinfer1::Dims3 nv_ksize(filter_d, filter_h, filter_w);
+  nvinfer1::Dims3 nv_dilations(dilations[0], dilations[1], dilations[2]);
+  nvinfer1::Dims3 nv_strides(strides[0], strides[1], strides[2]);
+  nvinfer1::Dims3 nv_paddings(paddings[0], paddings[1], paddings[2]);
+
+  TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT,
+                                static_cast<void*>(weight_data),
+                                static_cast<size_t>(Y_t->numel())};
+  float* bias_data = nullptr;
+  size_t bias_size = 0;
+
+  TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT,
+                              static_cast<void*>(bias_data), bias_size};
+  // In conv3d_transpose output channels = filter_dims[1] * groups
+  auto* layer = (op_desc.Type() == "conv3d_transpose")
+                    ? fadd_layer(X, n_input * groups, nv_ksize, weight, bias)
+                    : fadd_layer(X, n_output, nv_ksize, weight, bias);
+
+  PADDLE_ENFORCE_NOT_NULL(
+      layer, platform::errors::Fatal("TensorRT create conv3d/conv3d_transpose"
+                                     " layer failed."));
+  layer->setStrideNd(nv_strides);
+  layer->setPaddingNd(nv_paddings);
+  layer->setNbGroups(groups);
+  if (padding_algorithm == "SAME") {
+    layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
+  }
+  // set dilations
+  fset_dilation(layer, nv_dilations);
+
+  auto output_name = op_desc.Output("Output").front();
+  layer->setName((name + " (Output: " + output_name + ")").c_str());
+  layer->getOutput(0)->setName(output_name.c_str());
+  engine->SetITensor(output_name, layer->getOutput(0));
+
+  if (test_mode) {
+    engine->DeclareOutput(output_name);
+  }
+}
+
+class Conv3dOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope, bool test_mode) override {
+    ConvertConv3d(
+        engine_, op, scope, test_mode,
+        [&](nvinfer1::ITensor* inputs, int n_output, /* Conv output maps */
+            nvinfer1::Dims& ksize, TensorRTEngine::Weight& weight,
+            TensorRTEngine::Weight& bias) -> nvinfer1::IConvolutionLayer* {
+          auto* layer =
+              TRT_ENGINE_ADD_LAYER(engine_, ConvolutionNd, *inputs, n_output,
+                                   ksize, weight.get(), bias.get());
+          return layer;
+        },
+        [](nvinfer1::IConvolutionLayer* layer, nvinfer1::Dims& dilations) {
+          layer->setDilationNd(dilations);
+        },
+        "conv3d");
+  }
+};
+
+class Deconv3dOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope, bool test_mode) override {
+    ConvertConv3d(
+        engine_, op, scope, test_mode,
+        [&](nvinfer1::ITensor* inputs, int n_output, /* Deconv input maps */
+            nvinfer1::Dims& ksize, TensorRTEngine::Weight& weight,
+            TensorRTEngine::Weight& bias) -> nvinfer1::IDeconvolutionLayer* {
+          auto* layer =
+              TRT_ENGINE_ADD_LAYER(engine_, DeconvolutionNd, *inputs, n_output,
+                                   ksize, weight.get(), bias.get());
+          return layer;
+        },
+        [](nvinfer1::IDeconvolutionLayer* layer, nvinfer1::Dims& dilations) {},
+        "conv3d_transpose");
+  }
+};
+
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(conv3d, Conv3dOpConverter);
+REGISTER_TRT_OP_CONVERTER(conv3d_transpose, Deconv3dOpConverter);
diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h
@@ -76,11 +76,7 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
                         "TensorRT's tensor input requires at least 1 "
                         "dimensions, but input %s has %d dims.",
                         input, shape.size()));
-  PADDLE_ENFORCE_LE(shape.size(), 4UL,
-                    platform::errors::InvalidArgument(
-                        "TensorRT's tensor input requires at most 4 "
-                        "dimensions, but input %s has %d dims.",
-                        input, shape.size()));
+
   auto ShapeStr = [](const std::vector<T>& shape) {
     std::ostringstream os;
     os << "[";
@@ -103,6 +99,14 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
             input, ShapeStr(shape)));
       }
       return nvinfer1::Dims3(shape[1], shape[2], shape[3]);
+    } else if (shape.size() == 5UL) {
+      if (shape[2] == -1 || shape[3] == -1 || shape[4] == -1) {
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "The input [%s] shape of trt subgraph is %s, please enable "
+            "trt dynamic_shape mode by SetTRTDynamicShapeInfo.",
+            input, ShapeStr(shape)));
+      }
+      return nvinfer1::Dims4(shape[1], shape[2], shape[3], shape[4]);
     } else if (shape.size() == 3UL) {
       if (shape[1] == -1 || shape[2] == -1) {
         PADDLE_THROW(platform::errors::InvalidArgument(

diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -90,51 +90,51 @@ struct SimpleOpTypeSetTeller : public Teller {
                                                   "elementwise_mul",
                                                   "conv2d_transpose",
                                                   "hard_swish"};
-  std::unordered_set<std::string> teller_set{
-      "mul",
-      "matmul",
-      "conv2d",
-      "conv2d_fusion",
-      "pool2d",
-      "relu",
-      "softmax",
-      "sigmoid",
-      "hard_swish",
-      "depthwise_conv2d",
-      "batch_norm",
-      "concat",
-      "tanh",
-      "pad",
-      "elementwise_add",
-      "elementwise_mul",
-      "dropout",
-      "prelu",
-      "conv2d_transpose",
-      "depthwise_conv2d_transpose",
-      "leaky_relu",
-      "fc",
-      "shuffle_channel",
-      "swish",
-      "split",
-      "instance_norm",
-      "gelu",
-      "layer_norm",
-      "scale",
-      "stack",
-      "transpose2",
-      "transpose",
-      "flatten2",
-      "flatten",
-      "gather",
-      "gather_nd",
-      "yolo_box",
-      "roi_align",
-      "affine_channel",
-      "nearest_interp",
-      "anchor_generator",
-      "reduce_sum",
-      "reduce_mean",
-  };
+  std::unordered_set<std::string> teller_set{"mul",
+                                             "matmul",
+                                             "conv2d",
+                                             "conv2d_fusion",
+                                             "pool2d",
+                                             "relu",
+                                             "softmax",
+                                             "sigmoid",
+                                             "hard_swish",
+                                             "depthwise_conv2d",
+                                             "batch_norm",
+                                             "concat",
+                                             "tanh",
+                                             "pad",
+                                             "elementwise_add",
+                                             "elementwise_mul",
+                                             "dropout",
+                                             "prelu",
+                                             "conv2d_transpose",
+                                             "depthwise_conv2d_transpose",
+                                             "leaky_relu",
+                                             "fc",
+                                             "shuffle_channel",
+                                             "swish",
+                                             "split",
+                                             "instance_norm",
+                                             "gelu",
+                                             "layer_norm",
+                                             "scale",
+                                             "stack",
+                                             "transpose2",
+                                             "transpose",
+                                             "flatten2",
+                                             "flatten",
+                                             "gather",
+                                             "gather_nd",
+                                             "yolo_box",
+                                             "roi_align",
+                                             "affine_channel",
+                                             "nearest_interp",
+                                             "anchor_generator",
+                                             "reduce_sum",
+                                             "reduce_mean",
+                                             "conv3d",
+                                             "conv3d_transpose"};
 };
 
 bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
@@ -758,6 +758,65 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
     }
 #endif
 
+    if (op_type == "conv3d" || op_type == "conv3d_transpose") {
+      if (desc.HasAttr("padding_algorithm")) {
+        std::string padding_algorithm =
+            BOOST_GET_CONST(std::string, desc.GetAttr("padding_algorithm"));
+
+        // trt error is arised if conv3d_transpose and SAME
+        if (op_type == "conv3d_transpose" && padding_algorithm == "SAME" &&
+            !with_dynamic_shape) {
+          return false;
+        }
+      }
+
+#if !IS_TRT_VERSION_GE(7000)
+      // looks like some issues with trt6.0
+      if (with_dynamic_shape) {
+        return false;
+      }
+#endif
+      std::vector<int> paddings =
+          BOOST_GET_CONST(std::vector<int>, desc.GetAttr("paddings"));
+
+      // conv3d and conv3d_transpose need padding check
+      if (paddings.size() > 3) return false;
+
+      if (desc.Input("Input").size() != 1) {
+        VLOG(3) << "TRT Conv3d expect 1 input, but got "
+                << desc.Input("Input").size() << " input.";
+        return false;
+      }
+
+      if (desc.Input("Filter").size() != 1) {
+        VLOG(3) << "TRT Conv3d expect 1 filter, but got "
+                << desc.Input("Filter").size() << " filter.";
+        return false;
+      }
+
+      if (op_type == "conv3d_transpose") {
+        if (!desc.HasAttr("dilations")) {
+          return false;
+        } else {
+          const std::vector<int> dilations =
+              BOOST_GET_CONST(std::vector<int>, desc.GetAttr("dilations"));
+          if (dilations[0] != 1 || dilations[1] != 1 || dilations[2] != 1) {
+            VLOG(3) << "In conv3d_transpose, Dilations must be (1, 1, 1) for "
+                       "tensorRT, but given ("
+                    << dilations[0] << ", " << dilations[1] << ", "
+                    << dilations[2] << ")";
+            return false;
+          }
+        }
+      }
+
+      if (desc.Output("Output").size() != 1) {
+        VLOG(3) << "TRT Conv3d expect 1 output, but got "
+                << desc.Output("Output").size() << " output.";
+        return false;
+      }
+    }
+
     if ((*teller)(op_type, desc, use_no_calib_int8)) return true;
   }
 

diff --git a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
@@ -55,4 +55,6 @@ set_tests_properties(test_trt_tile_op PROPERTIES TIMEOUT 60)
 set_tests_properties(test_trt_fc_fuse_quant_dequant_pass PROPERTIES TIMEOUT 100)
 set_tests_properties(test_trt_conv_quant_dequant_pass PROPERTIES TIMEOUT 100)
 set_tests_properties(test_trt_matmul_quant_dequant PROPERTIES TIMEOUT 100)
+set_tests_properties(test_trt_conv3d_op PROPERTIES TIMEOUT 60)
+set_tests_properties(test_trt_conv3d_transpose_op PROPERTIES TIMEOUT 60)
 endif()