PaddlePaddle · jiangjiajun · Jul 26, 2022 · Jul 23, 2022 · Jul 23, 2022 · Jul 24, 2022
diff --git a/external/onnxruntime.cmake b/external/onnxruntime.cmake
@@ -27,7 +27,7 @@ set(ONNXRUNTIME_LIB_DIR
     CACHE PATH "onnxruntime lib directory." FORCE)
 set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${ONNXRUNTIME_LIB_DIR}")
 
-set(ONNXRUNTIME_VERSION "1.11.1")
+set(ONNXRUNTIME_VERSION "1.12.0")
 set(ONNXRUNTIME_URL_PREFIX "https://bj.bcebos.com/paddle2onnx/libs/")
 
 if(WIN32)

diff --git a/external/paddle2onnx.cmake b/external/paddle2onnx.cmake
@@ -43,7 +43,7 @@ else()
 endif(WIN32)
 
 set(PADDLE2ONNX_URL_BASE "https://bj.bcebos.com/paddle2onnx/libs/")
-set(PADDLE2ONNX_VERSION "1.0.0rc1")
+set(PADDLE2ONNX_VERSION "1.0.0rc2")
 if(WIN32)
   set(PADDLE2ONNX_FILE "paddle2onnx-win-x64-${PADDLE2ONNX_VERSION}.zip")
 elseif(APPLE)

diff --git a/fastdeploy/backends/backend.h b/fastdeploy/backends/backend.h
@@ -18,7 +18,7 @@
 #include <memory>
 #include <string>
 #include <vector>
-
+#include "fastdeploy/backends/common/multiclass_nms.h"
 #include "fastdeploy/core/fd_tensor.h"
 
 namespace fastdeploy {
@@ -45,4 +45,4 @@ class BaseBackend {
                      std::vector<FDTensor>* outputs) = 0;
 };
 
-} // namespace fastdeploy
+}  // namespace fastdeploy
diff --git a/fastdeploy/backends/common/multiclass_nms.cc b/fastdeploy/backends/common/multiclass_nms.cc
@@ -0,0 +1,224 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/backends/common/multiclass_nms.h"
+#include <algorithm>
+#include "fastdeploy/core/fd_tensor.h"
+#include "fastdeploy/utils/utils.h"
+
+namespace fastdeploy {
+namespace backend {
+template <class T>
+bool SortScorePairDescend(const std::pair<float, T>& pair1,
+                          const std::pair<float, T>& pair2) {
+  return pair1.first > pair2.first;
+}
+
+void GetMaxScoreIndex(const float* scores, const int& score_size,
+                      const float& threshold, const int& top_k,
+                      std::vector<std::pair<float, int>>* sorted_indices) {
+  for (size_t i = 0; i < score_size; ++i) {
+    if (scores[i] > threshold) {
+      sorted_indices->push_back(std::make_pair(scores[i], i));
+    }
+  }
+  // Sort the score pair according to the scores in descending order
+  std::stable_sort(sorted_indices->begin(), sorted_indices->end(),
+                   SortScorePairDescend<int>);
+  // Keep top_k scores if needed.
+  if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
+    sorted_indices->resize(top_k);
+  }
+}
+
+float BBoxArea(const float* box, const bool& normalized) {
+  if (box[2] < box[0] || box[3] < box[1]) {
+    // If coordinate values are is invalid
+    // (e.g. xmax < xmin or ymax < ymin), return 0.
+    return 0.f;
+  } else {
+    const float w = box[2] - box[0];
+    const float h = box[3] - box[1];
+    if (normalized) {
+      return w * h;
+    } else {
+      // If coordinate values are not within range [0, 1].
+      return (w + 1) * (h + 1);
+    }
+  }
+}
+
+float JaccardOverlap(const float* box1, const float* box2,
+                     const bool& normalized) {
+  if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
+      box2[3] < box1[1]) {
+    return 0.f;
+  } else {
+    const float inter_xmin = std::max(box1[0], box2[0]);
+    const float inter_ymin = std::max(box1[1], box2[1]);
+    const float inter_xmax = std::min(box1[2], box2[2]);
+    const float inter_ymax = std::min(box1[3], box2[3]);
+    float norm = normalized ? 0.0f : 1.0f;
+    float inter_w = inter_xmax - inter_xmin + norm;
+    float inter_h = inter_ymax - inter_ymin + norm;
+    const float inter_area = inter_w * inter_h;
+    const float bbox1_area = BBoxArea(box1, normalized);
+    const float bbox2_area = BBoxArea(box2, normalized);
+    return inter_area / (bbox1_area + bbox2_area - inter_area);
+  }
+}
+
+void MultiClassNMS::FastNMS(const float* boxes, const float* scores,
+                            const int& num_boxes,
+                            std::vector<int>* keep_indices) {
+  std::vector<std::pair<float, int>> sorted_indices;
+  GetMaxScoreIndex(scores, num_boxes, score_threshold, nms_top_k,
+                   &sorted_indices);
+
+  float adaptive_threshold = nms_threshold;
+  while (sorted_indices.size() != 0) {
+    const int idx = sorted_indices.front().second;
+    bool keep = true;
+    for (size_t k = 0; k < keep_indices->size(); ++k) {
+      if (!keep) {
+        break;
+      }
+      const int kept_idx = (*keep_indices)[k];
+      float overlap =
+          JaccardOverlap(boxes + idx * 4, boxes + kept_idx * 4, normalized);
+      keep = overlap <= adaptive_threshold;
+    }
+    if (keep) {
+      keep_indices->push_back(idx);
+    }
+    sorted_indices.erase(sorted_indices.begin());
+    if (keep && nms_eta<1.0 & adaptive_threshold> 0.5) {
+      adaptive_threshold *= nms_eta;
+    }
+  }
+}
+
+int MultiClassNMS::NMSForEachSample(
+    const float* boxes, const float* scores, int num_boxes, int num_classes,
+    std::map<int, std::vector<int>>* keep_indices) {
+  for (int i = 0; i < num_classes; ++i) {
+    if (i == background_label) {
+      continue;
+    }
+    const float* score_for_class_i = scores + i * num_boxes;
+    FastNMS(boxes, score_for_class_i, num_boxes, &((*keep_indices)[i]));
+  }
+  int num_det = 0;
+  for (auto iter = keep_indices->begin(); iter != keep_indices->end(); ++iter) {
+    num_det += iter->second.size();
+  }
+
+  if (keep_top_k > -1 && num_det > keep_top_k) {
+    std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
+    for (const auto& it : *keep_indices) {
+      int label = it.first;
+      const float* current_score = scores + label * num_boxes;
+      auto& label_indices = it.second;
+      for (size_t j = 0; j < label_indices.size(); ++j) {
+        int idx = label_indices[j];
+        score_index_pairs.push_back(
+            std::make_pair(current_score[idx], std::make_pair(label, idx)));
+      }
+    }
+    std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(),
+                     SortScorePairDescend<std::pair<int, int>>);
+    score_index_pairs.resize(keep_top_k);
+
+    std::map<int, std::vector<int>> new_indices;
+    for (size_t j = 0; j < score_index_pairs.size(); ++j) {
+      int label = score_index_pairs[j].second.first;
+      int idx = score_index_pairs[j].second.second;
+      new_indices[label].push_back(idx);
+    }
+    new_indices.swap(*keep_indices);
+    num_det = keep_top_k;
+  }
+  return num_det;
+}
+
+void MultiClassNMS::Compute(const float* boxes_data, const float* scores_data,
+                            const std::vector<int64_t>& boxes_dim,
+                            const std::vector<int64_t>& scores_dim) {
+  int score_size = scores_dim.size();
+
+  int64_t batch_size = scores_dim[0];
+  int64_t box_dim = boxes_dim[2];
+  int64_t out_dim = box_dim + 2;
+
+  int num_nmsed_out = 0;
+  FDASSERT(score_size == 3, "Require rank of input scores be 3, but now it's " +
+                                std::to_string(score_size) + ".");
+  FDASSERT(boxes_dim[2] == 4,
+           "Require the 3-dimension of input boxes be 4, but now it's " +
+               std::to_string(boxes_dim[2]) + ".");
+  out_num_rois_data.resize(batch_size);
+
+  std::vector<std::map<int, std::vector<int>>> all_indices;
+  for (size_t i = 0; i < batch_size; ++i) {
+    std::map<int, std::vector<int>> indices;  // indices kept for each class
+    const float* current_boxes_ptr =
+        boxes_data + i * boxes_dim[1] * boxes_dim[2];
+    const float* current_scores_ptr =
+        scores_data + i * scores_dim[1] * scores_dim[2];
+    int num = NMSForEachSample(current_boxes_ptr, current_scores_ptr,
+                               boxes_dim[1], scores_dim[1], &indices);
+    num_nmsed_out += num;
+    out_num_rois_data[i] = num;
+    all_indices.emplace_back(indices);
+  }
+  std::vector<int64_t> out_box_dims = {num_nmsed_out, 6};
+  std::vector<int64_t> out_index_dims = {num_nmsed_out, 1};
+  if (num_nmsed_out == 0) {
+    for (size_t i = 0; i < batch_size; ++i) {
+      out_num_rois_data[i] = 0;
+    }
+    return;
+  }
+  out_box_data.resize(num_nmsed_out * 6);
+  out_index_data.resize(num_nmsed_out);
+
+  int count = 0;
+  for (size_t i = 0; i < batch_size; ++i) {
+    const float* current_boxes_ptr =
+        boxes_data + i * boxes_dim[1] * boxes_dim[2];
+    const float* current_scores_ptr =
+        scores_data + i * scores_dim[1] * scores_dim[2];
+    for (const auto& it : all_indices[i]) {
+      int label = it.first;
+      const auto& indices = it.second;
+      const float* current_scores_class_ptr =
+          current_scores_ptr + label * scores_dim[2];
+      for (size_t j = 0; j < indices.size(); ++j) {
+        int start = count * 6;
+        out_box_data[start] = label;
+        out_box_data[start + 1] = current_scores_class_ptr[indices[j]];
+
+        out_box_data[start + 2] = current_boxes_ptr[indices[j] * 4];
+        out_box_data[start + 3] = current_boxes_ptr[indices[j] * 4 + 1];
+        out_box_data[start + 4] = current_boxes_ptr[indices[j] * 4 + 2];
+
+        out_box_data[start + 5] = current_boxes_ptr[indices[j] * 4 + 3];
+        out_index_data[count] = i * boxes_dim[1] + indices[j];
+        count += 1;
+      }
+    }
+  }
+}
+}  // namespace backend
+}  // namespace fastdeploy
diff --git a/fastdeploy/backends/common/multiclass_nms.h b/fastdeploy/backends/common/multiclass_nms.h
@@ -0,0 +1,45 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <map>
+#include <string>
+#include <vector>
+
+namespace fastdeploy {
+namespace backend {
+struct MultiClassNMS {
+  int64_t background_label = -1;
+  int64_t keep_top_k = -1;
+  float nms_eta;
+  float nms_threshold = 0.7;
+  int64_t nms_top_k;
+  bool normalized;
+  float score_threshold;
+
+  std::vector<int32_t> out_num_rois_data;
+  std::vector<int32_t> out_index_data;
+  std::vector<float> out_box_data;
+  void FastNMS(const float* boxes, const float* scores, const int& num_boxes,
+               std::vector<int>* keep_indices);
+  int NMSForEachSample(const float* boxes, const float* scores, int num_boxes,
+                       int num_classes,
+                       std::map<int, std::vector<int>>* keep_indices);
+  void Compute(const float* boxes, const float* scores,
+               const std::vector<int64_t>& boxes_dim,
+               const std::vector<int64_t>& scores_dim);
+};
+}  // namespace backend
+
+}  // namespace fastdeploy
diff --git a/fastdeploy/backends/ort/ops/multiclass_nms.cc b/fastdeploy/backends/ort/ops/multiclass_nms.cc
@@ -253,8 +253,5 @@ void MultiClassNmsKernel::GetAttribute(const OrtKernelInfo* info) {
   nms_top_k = ort_.KernelInfoGetAttribute<int64_t>(info, "nms_top_k");
   normalized = ort_.KernelInfoGetAttribute<int64_t>(info, "normalized");
   score_threshold = ort_.KernelInfoGetAttribute<float>(info, "score_threshold");
-  std::cout << background_label << " " << keep_top_k << " " << nms_eta << " "
-            << nms_threshold << " " << nms_top_k << " " << normalized << " "
-            << score_threshold << " " << std::endl;
 }
 }  // namespace fastdeploy
diff --git a/fastdeploy/backends/ort/ort_backend.cc b/fastdeploy/backends/ort/ort_backend.cc
@@ -107,16 +107,26 @@ bool OrtBackend::InitFromPaddle(const std::string& model_file,
 #ifdef ENABLE_PADDLE_FRONTEND
   char* model_content_ptr;
   int model_content_size = 0;
+
+  std::vector<paddle2onnx::CustomOp> custom_ops;
+  for (auto& item : option.custom_op_info_) {
+    paddle2onnx::CustomOp op;
+    strcpy(op.op_name, item.first.c_str());
+    strcpy(op.export_op_name, item.second.c_str());
+    custom_ops.emplace_back(op);
+  }
   if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
                            &model_content_ptr, &model_content_size, 11, true,
-                           verbose, true, true, true)) {
+                           verbose, true, true, true, custom_ops.data(),
+                           custom_ops.size())) {
     FDERROR << "Error occured while export PaddlePaddle to ONNX format."
             << std::endl;
     return false;
   }
+
   std::string onnx_model_proto(model_content_ptr,
                                model_content_ptr + model_content_size);
-  delete model_content_ptr;
+  delete[] model_content_ptr;
   model_content_ptr = nullptr;
   return InitFromOnnx(onnx_model_proto, option, true);
 #else

diff --git a/fastdeploy/backends/ort/ort_backend.h b/fastdeploy/backends/ort/ort_backend.h
@@ -44,6 +44,10 @@ struct OrtBackendOption {
   int execution_mode = -1;
   bool use_gpu = false;
   int gpu_id = 0;
+
+  // inside parameter, maybe remove next version
+  bool remove_multiclass_nms_ = false;
+  std::map<std::string, std::string> custom_op_info_;
 };
 
 class OrtBackend : public BaseBackend {

diff --git a/fastdeploy/backends/tensorrt/trt_backend.cc b/fastdeploy/backends/tensorrt/trt_backend.cc
@@ -162,18 +162,41 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
   }
 
 #ifdef ENABLE_PADDLE_FRONTEND
+  std::vector<paddle2onnx::CustomOp> custom_ops;
+  for (auto& item : option.custom_op_info_) {
+    paddle2onnx::CustomOp op;
+    std::strcpy(op.op_name, item.first.c_str());
+    std::strcpy(op.export_op_name, item.second.c_str());
+    custom_ops.emplace_back(op);
+  }
   char* model_content_ptr;
   int model_content_size = 0;
   if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
                            &model_content_ptr, &model_content_size, 11, true,
-                           verbose, true, true, true)) {
+                           verbose, true, true, true, custom_ops.data(),
+                           custom_ops.size())) {
     FDERROR << "Error occured while export PaddlePaddle to ONNX format."
             << std::endl;
     return false;
   }
+
+  if (option.remove_multiclass_nms_) {
+    char* new_model = nullptr;
+    int new_model_size = 0;
+    if (!paddle2onnx::RemoveMultiClassNMS(model_content_ptr, model_content_size,
+                                          &new_model, &new_model_size)) {
+      FDERROR << "Try to remove MultiClassNMS failed." << std::endl;
+      return false;
+    }
+    delete[] model_content_ptr;
+    std::string onnx_model_proto(new_model, new_model + new_model_size);
+    delete[] new_model;
+    return InitFromOnnx(onnx_model_proto, option, true);
+  }
+
   std::string onnx_model_proto(model_content_ptr,
                                model_content_ptr + model_content_size);
-  delete model_content_ptr;
+  delete[] model_content_ptr;
   model_content_ptr = nullptr;
   return InitFromOnnx(onnx_model_proto, option, true);
 #else