From 7f076e7e27d1fbdd040364b0297b1550720fc9ea Mon Sep 17 00:00:00 2001
From: jiangjiajun <jiangjiajun@baidu.com>
Date: Sat, 23 Jul 2022 14:17:45 +0000
Subject: [PATCH 1/6] Add custom operator for onnxruntime ans fix paddle
 backend

---
 ThirdPartyNotices.txt                         | 209 ++++++++++++++
 external/paddle_inference.cmake               |   7 +-
 external/utils.cmake                          |  13 +
 fastdeploy/backends/ort/ops/multiclass_nms.cc | 260 ++++++++++++++++++
 fastdeploy/backends/ort/ops/multiclass_nms.h  |  76 +++++
 fastdeploy/backends/ort/ort_backend.cc        |  21 +-
 fastdeploy/backends/ort/ort_backend.h         |   9 +-
 fastdeploy/backends/paddle/util.cc            |   1 +
 fastdeploy/fastdeploy_model.cc                |   2 +-
 fastdeploy/fastdeploy_runtime.cc              |  29 +-
 fastdeploy/fastdeploy_runtime.h               |   9 +-
 setup.py                                      |  90 +++---
 12 files changed, 666 insertions(+), 60 deletions(-)
 create mode 100644 fastdeploy/backends/ort/ops/multiclass_nms.cc
 create mode 100644 fastdeploy/backends/ort/ops/multiclass_nms.h

diff --git a/ThirdPartyNotices.txt b/ThirdPartyNotices.txt
index 5842b9a7179..fa9df0fbab5 100644
--- a/ThirdPartyNotices.txt
+++ b/ThirdPartyNotices.txt
@@ -732,3 +732,212 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
+
+---------
+7. https://github.com/oneapi-src/oneDNN/
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   ============================================================================
+
+   Copyright 2016-2021 Intel Corporation
+   Copyright 2018 YANDEX LLC
+   Copyright 2019-2021 FUJITSU LIMITED
+   Copyright 2020 Arm Limited and affiliates
+   Copyright 2020 Codeplay Software Limited
+   Copyright 2021 Alanna Tempest
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+   This distribution includes third party software ("third party programs").
+   This third party software, even if included with the distribution of
+   the Intel software, may be governed by separate license terms, including
+   without limitation, third party license terms, other Intel software license
+   terms, and open source software license terms. These separate license terms
+   govern your use of the third party programs as set forth in the
+   "THIRD-PARTY-PROGRAMS" file.
diff --git a/external/paddle_inference.cmake b/external/paddle_inference.cmake
index 41aa740f622..8894209f4a2 100644
--- a/external/paddle_inference.cmake
+++ b/external/paddle_inference.cmake
@@ -83,12 +83,7 @@ ExternalProject_Add(
   BUILD_COMMAND ""
   UPDATE_COMMAND ""
   INSTALL_COMMAND
-    ${CMAKE_COMMAND} -E remove_directory ${PADDLEINFERENCE_INSTALL_DIR} &&
-    ${CMAKE_COMMAND} -E make_directory ${PADDLEINFERENCE_INSTALL_DIR} &&
-    ${CMAKE_COMMAND} -E rename ${PADDLEINFERENCE_SOURCE_DIR}/paddle/
-    ${PADDLEINFERENCE_INSTALL_DIR}/paddle && ${CMAKE_COMMAND} -E rename 
-    ${PADDLEINFERENCE_SOURCE_DIR}/third_party ${PADDLEINFERENCE_INSTALL_DIR}/third_party &&
-    ${CMAKE_COMMAND} -E rename ${PADDLEINFERENCE_SOURCE_DIR}/version.txt ${PADDLEINFERENCE_INSTALL_DIR}/version.txt
+    ${CMAKE_COMMAND} -E copy_directory ${PADDLEINFERENCE_SOURCE_DIR} ${PADDLEINFERENCE_INSTALL_DIR}
   BUILD_BYPRODUCTS ${PADDLEINFERENCE_COMPILE_LIB})
 
 add_library(external_paddle_inference STATIC IMPORTED GLOBAL)
diff --git a/external/utils.cmake b/external/utils.cmake
index 3e6d70b42d6..f5d69720423 100644
--- a/external/utils.cmake
+++ b/external/utils.cmake
@@ -13,3 +13,16 @@ function(redefine_file_macro targetname)
             )
     endforeach()
 endfunction()
+
+function(download_and_decompress url filename decompress_dir)
+  if(NOT EXISTS ${filename})
+    message("Downloading file from ${url} ...")
+    file(DOWNLOAD ${url} "${filename}.tmp" SHOW_PROGRESS)
+    file(RENAME "${filename}.tmp" ${filename})
+  endif()
+  if(NOT EXISTS ${decompress_dir})
+    file(MAKE_DIRECTORY ${decompress_dir})
+  endif()
+  message("Decompress file ${filename} ...")
+  execute_process(COMMAND ${CMAKE_COMMAND} -E tar -xf ${filename} WORKING_DIRECTORY ${decompress_dir})
+endfunction()
diff --git a/fastdeploy/backends/ort/ops/multiclass_nms.cc b/fastdeploy/backends/ort/ops/multiclass_nms.cc
new file mode 100644
index 00000000000..8c00dc7bede
--- /dev/null
+++ b/fastdeploy/backends/ort/ops/multiclass_nms.cc
@@ -0,0 +1,260 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/backends/ort/ops/multiclass_nms.h"
+#include <algorithm>
+#include "fastdeploy/core/fd_tensor.h"
+#include "fastdeploy/utils/utils.h"
+
+namespace fastdeploy {
+
+struct OrtTensorDimensions : std::vector<int64_t> {
+  OrtTensorDimensions(Ort::CustomOpApi ort, const OrtValue* value) {
+    OrtTensorTypeAndShapeInfo* info = ort.GetTensorTypeAndShape(value);
+    std::vector<int64_t>::operator=(ort.GetTensorShape(info));
+    ort.ReleaseTensorTypeAndShapeInfo(info);
+  }
+};
+
+template <class T>
+bool SortScorePairDescend(const std::pair<float, T>& pair1,
+                          const std::pair<float, T>& pair2) {
+  return pair1.first > pair2.first;
+}
+
+void GetMaxScoreIndex(const float* scores, const int& score_size,
+                      const float& threshold, const int& top_k,
+                      std::vector<std::pair<float, int>>* sorted_indices) {
+  for (size_t i = 0; i < score_size; ++i) {
+    if (scores[i] > threshold) {
+      sorted_indices->push_back(std::make_pair(scores[i], i));
+    }
+  }
+  // Sort the score pair according to the scores in descending order
+  std::stable_sort(sorted_indices->begin(), sorted_indices->end(),
+                   SortScorePairDescend<int>);
+  // Keep top_k scores if needed.
+  if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
+    sorted_indices->resize(top_k);
+  }
+}
+
+float BBoxArea(const float* box, const bool& normalized) {
+  if (box[2] < box[0] || box[3] < box[1]) {
+    // If coordinate values are is invalid
+    // (e.g. xmax < xmin or ymax < ymin), return 0.
+    return 0.f;
+  } else {
+    const float w = box[2] - box[0];
+    const float h = box[3] - box[1];
+    if (normalized) {
+      return w * h;
+    } else {
+      // If coordinate values are not within range [0, 1].
+      return (w + 1) * (h + 1);
+    }
+  }
+}
+
+float JaccardOverlap(const float* box1, const float* box2,
+                     const bool& normalized) {
+  if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
+      box2[3] < box1[1]) {
+    return 0.f;
+  } else {
+    const float inter_xmin = std::max(box1[0], box2[0]);
+    const float inter_ymin = std::max(box1[1], box2[1]);
+    const float inter_xmax = std::min(box1[2], box2[2]);
+    const float inter_ymax = std::min(box1[3], box2[3]);
+    float norm = normalized ? 0.0f : 1.0f;
+    float inter_w = inter_xmax - inter_xmin + norm;
+    float inter_h = inter_ymax - inter_ymin + norm;
+    const float inter_area = inter_w * inter_h;
+    const float bbox1_area = BBoxArea(box1, normalized);
+    const float bbox2_area = BBoxArea(box2, normalized);
+    return inter_area / (bbox1_area + bbox2_area - inter_area);
+  }
+}
+
+void MultiClassNmsKernel::FastNMS(const float* boxes, const float* scores,
+                                  const int& num_boxes,
+                                  std::vector<int>* keep_indices) {
+  std::vector<std::pair<float, int>> sorted_indices;
+  GetMaxScoreIndex(scores, num_boxes, score_threshold, nms_top_k,
+                   &sorted_indices);
+
+  float adaptive_threshold = nms_threshold;
+  while (sorted_indices.size() != 0) {
+    const int idx = sorted_indices.front().second;
+    bool keep = true;
+    for (size_t k = 0; k < keep_indices->size(); ++k) {
+      if (!keep) {
+        break;
+      }
+      const int kept_idx = (*keep_indices)[k];
+      float overlap =
+          JaccardOverlap(boxes + idx * 4, boxes + kept_idx * 4, normalized);
+      keep = overlap <= adaptive_threshold;
+    }
+    if (keep) {
+      keep_indices->push_back(idx);
+    }
+    sorted_indices.erase(sorted_indices.begin());
+    if (keep && nms_eta<1.0 & adaptive_threshold> 0.5) {
+      adaptive_threshold *= nms_eta;
+    }
+  }
+}
+
+int MultiClassNmsKernel::NMSForEachSample(
+    const float* boxes, const float* scores, int num_boxes, int num_classes,
+    std::map<int, std::vector<int>>* keep_indices) {
+  for (int i = 0; i < num_classes; ++i) {
+    if (i == background_label) {
+      continue;
+    }
+    const float* score_for_class_i = scores + i * num_boxes;
+    FastNMS(boxes, score_for_class_i, num_boxes, &((*keep_indices)[i]));
+  }
+  int num_det = 0;
+  for (auto iter = keep_indices->begin(); iter != keep_indices->end(); ++iter) {
+    num_det += iter->second.size();
+  }
+
+  if (keep_top_k > -1 && num_det > keep_top_k) {
+    std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
+    for (const auto& it : *keep_indices) {
+      int label = it.first;
+      const float* current_score = scores + label * num_boxes;
+      auto& label_indices = it.second;
+      for (size_t j = 0; j < label_indices.size(); ++j) {
+        int idx = label_indices[j];
+        score_index_pairs.push_back(
+            std::make_pair(current_score[idx], std::make_pair(label, idx)));
+      }
+    }
+    std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(),
+                     SortScorePairDescend<std::pair<int, int>>);
+    score_index_pairs.resize(keep_top_k);
+
+    std::map<int, std::vector<int>> new_indices;
+    for (size_t j = 0; j < score_index_pairs.size(); ++j) {
+      int label = score_index_pairs[j].second.first;
+      int idx = score_index_pairs[j].second.second;
+      new_indices[label].push_back(idx);
+    }
+    new_indices.swap(*keep_indices);
+    num_det = keep_top_k;
+  }
+  return num_det;
+}
+
+void MultiClassNmsKernel::Compute(OrtKernelContext* context) {
+  const OrtValue* boxes = ort_.KernelContext_GetInput(context, 0);
+  const OrtValue* scores = ort_.KernelContext_GetInput(context, 1);
+  const float* boxes_data =
+      reinterpret_cast<const float*>(ort_.GetTensorData<float>(boxes));
+  const float* scores_data =
+      reinterpret_cast<const float*>(ort_.GetTensorData<float>(scores));
+  OrtTensorDimensions boxes_dim(ort_, boxes);
+  OrtTensorDimensions scores_dim(ort_, scores);
+  int score_size = scores_dim.size();
+
+  int64_t batch_size = scores_dim[0];
+  int64_t box_dim = boxes_dim[2];
+  int64_t out_dim = box_dim + 2;
+
+  int num_nmsed_out = 0;
+  FDASSERT(score_size == 3, "Require rank of input scores be 3, but now it's " +
+                                std::to_string(score_size) + ".");
+  FDASSERT(boxes_dim[2] == 4,
+           "Require the 3-dimension of input boxes be 4, but now it's " +
+               std::to_string(boxes_dim[2]) + ".");
+  std::vector<int64_t> out_num_rois_dims = {batch_size};
+  OrtValue* out_num_rois = ort_.KernelContext_GetOutput(
+      context, 2, out_num_rois_dims.data(), out_num_rois_dims.size());
+  int32_t* out_num_rois_data = ort_.GetTensorMutableData<int32_t>(out_num_rois);
+
+  std::vector<std::map<int, std::vector<int>>> all_indices;
+  for (size_t i = 0; i < batch_size; ++i) {
+    std::map<int, std::vector<int>> indices;  // indices kept for each class
+    const float* current_boxes_ptr =
+        boxes_data + i * boxes_dim[1] * boxes_dim[2];
+    const float* current_scores_ptr =
+        scores_data + i * scores_dim[1] * scores_dim[2];
+    int num = NMSForEachSample(current_boxes_ptr, current_scores_ptr,
+                               boxes_dim[1], scores_dim[1], &indices);
+    num_nmsed_out += num;
+    out_num_rois_data[i] = num;
+    all_indices.emplace_back(indices);
+  }
+  std::vector<int64_t> out_box_dims = {num_nmsed_out, 6};
+  std::vector<int64_t> out_index_dims = {num_nmsed_out, 1};
+  OrtValue* out_box = ort_.KernelContext_GetOutput(
+      context, 0, out_box_dims.data(), out_box_dims.size());
+  OrtValue* out_index = ort_.KernelContext_GetOutput(
+      context, 1, out_index_dims.data(), out_index_dims.size());
+  if (num_nmsed_out == 0) {
+    int32_t* out_num_rois_data =
+        ort_.GetTensorMutableData<int32_t>(out_num_rois);
+    for (size_t i = 0; i < batch_size; ++i) {
+      out_num_rois_data[i] = 0;
+    }
+    return;
+  }
+  float* out_box_data = ort_.GetTensorMutableData<float>(out_box);
+  int32_t* out_index_data = ort_.GetTensorMutableData<int32_t>(out_index);
+
+  int count = 0;
+  for (size_t i = 0; i < batch_size; ++i) {
+    const float* current_boxes_ptr =
+        boxes_data + i * boxes_dim[1] * boxes_dim[2];
+    const float* current_scores_ptr =
+        scores_data + i * scores_dim[1] * scores_dim[2];
+    for (const auto& it : all_indices[i]) {
+      int label = it.first;
+      const auto& indices = it.second;
+      const float* current_scores_class_ptr =
+          current_scores_ptr + label * scores_dim[2];
+      for (size_t j = 0; j < indices.size(); ++j) {
+        int start = count * 6;
+        out_box_data[start] = label;
+        out_box_data[start + 1] = current_scores_class_ptr[indices[j]];
+
+        out_box_data[start + 2] = current_boxes_ptr[indices[j] * 4];
+        out_box_data[start + 3] = current_boxes_ptr[indices[j] * 4 + 1];
+        out_box_data[start + 4] = current_boxes_ptr[indices[j] * 4 + 2];
+
+        out_box_data[start + 5] = current_boxes_ptr[indices[j] * 4 + 3];
+        out_index_data[count] = i * boxes_dim[1] + indices[j];
+        count += 1;
+      }
+    }
+  }
+}
+
+void MultiClassNmsKernel::GetAttribute(const OrtKernelInfo* info) {
+  background_label =
+      ort_.KernelInfoGetAttribute<int64_t>(info, "background_label");
+  keep_top_k = ort_.KernelInfoGetAttribute<int64_t>(info, "keep_top_k");
+  nms_eta = ort_.KernelInfoGetAttribute<float>(info, "nms_eta");
+  nms_threshold = ort_.KernelInfoGetAttribute<float>(info, "nms_threshold");
+  nms_top_k = ort_.KernelInfoGetAttribute<int64_t>(info, "nms_top_k");
+  normalized = ort_.KernelInfoGetAttribute<int64_t>(info, "normalized");
+  score_threshold = ort_.KernelInfoGetAttribute<float>(info, "score_threshold");
+  std::cout << background_label << " " << keep_top_k << " " << nms_eta << " "
+            << nms_threshold << " " << nms_top_k << " " << normalized << " "
+            << score_threshold << " " << std::endl;
+}
+}  // namespace fastdeploy
diff --git a/fastdeploy/backends/ort/ops/multiclass_nms.h b/fastdeploy/backends/ort/ops/multiclass_nms.h
new file mode 100644
index 00000000000..78f9a225576
--- /dev/null
+++ b/fastdeploy/backends/ort/ops/multiclass_nms.h
@@ -0,0 +1,76 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <map>
+#include "onnxruntime_cxx_api.h"  // NOLINT
+
+namespace fastdeploy {
+
+struct MultiClassNmsKernel {
+ protected:
+  int64_t background_label = -1;
+  int64_t keep_top_k = -1;
+  float nms_eta;
+  float nms_threshold = 0.7;
+  int64_t nms_top_k;
+  bool normalized;
+  float score_threshold;
+  Ort::CustomOpApi ort_;
+
+ public:
+  MultiClassNmsKernel(Ort::CustomOpApi ort, const OrtKernelInfo* info)
+      : ort_(ort) {
+    GetAttribute(info);
+  }
+
+  void GetAttribute(const OrtKernelInfo* info);
+
+  void Compute(OrtKernelContext* context);
+  void FastNMS(const float* boxes, const float* scores, const int& num_boxes,
+               std::vector<int>* keep_indices);
+  int NMSForEachSample(const float* boxes, const float* scores, int num_boxes,
+                       int num_classes,
+                       std::map<int, std::vector<int>>* keep_indices);
+};
+
+struct MultiClassNmsOp
+    : Ort::CustomOpBase<MultiClassNmsOp, MultiClassNmsKernel> {
+  void* CreateKernel(Ort::CustomOpApi api, const OrtKernelInfo* info) const {
+    return new MultiClassNmsKernel(api, info);
+  }
+
+  const char* GetName() const { return "MultiClassNMS"; }
+
+  size_t GetInputTypeCount() const { return 2; }
+
+  ONNXTensorElementDataType GetInputType(size_t index) const {
+    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
+  }
+
+  size_t GetOutputTypeCount() const { return 3; }
+
+  ONNXTensorElementDataType GetOutputType(size_t index) const {
+    if (index == 0) {
+      return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
+    }
+    return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32;
+  }
+
+  const char* GetExecutionProviderType() const {
+    return "CPUExecutionProvider";
+  }
+};
+
+}  // namespace fastdeploy
diff --git a/fastdeploy/backends/ort/ort_backend.cc b/fastdeploy/backends/ort/ort_backend.cc
index 7060b758c1f..909b5f28752 100644
--- a/fastdeploy/backends/ort/ort_backend.cc
+++ b/fastdeploy/backends/ort/ort_backend.cc
@@ -13,15 +13,19 @@
 // limitations under the License.
 
 #include "fastdeploy/backends/ort/ort_backend.h"
+#include <memory>
+#include "fastdeploy/backends/ort/ops/multiclass_nms.h"
 #include "fastdeploy/backends/ort/utils.h"
 #include "fastdeploy/utils/utils.h"
-#include <memory>
 #ifdef ENABLE_PADDLE_FRONTEND
 #include "paddle2onnx/converter.h"
 #endif
 
 namespace fastdeploy {
 
+std::vector<OrtCustomOp*> OrtBackend::custom_operators_ =
+    std::vector<OrtCustomOp*>();
+
 ONNXTensorElementDataType GetOrtDtype(FDDataType fd_dtype) {
   if (fd_dtype == FDDataType::FP32) {
     return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
@@ -131,7 +135,9 @@ bool OrtBackend::InitFromOnnx(const std::string& model_file,
             << std::endl;
     return false;
   }
+
   BuildOption(option);
+  InitCustomOperators();
   if (from_memory_buffer) {
     session_ = {env_, model_file.data(), model_file.size(), session_options_};
   } else {
@@ -275,4 +281,15 @@ TensorInfo OrtBackend::GetOutputInfo(int index) {
   return info;
 }
 
-} // namespace fastdeploy
+void OrtBackend::InitCustomOperators() {
+  if (custom_operators_.size() == 0) {
+    MultiClassNmsOp* custom_op = new MultiClassNmsOp{};
+    custom_operators_.push_back(custom_op);
+  }
+  for (size_t i = 0; i < custom_operators_.size(); ++i) {
+    custom_op_domain_.Add(custom_operators_[i]);
+  }
+  session_options_.Add(custom_op_domain_);
+}
+
+}  // namespace fastdeploy
diff --git a/fastdeploy/backends/ort/ort_backend.h b/fastdeploy/backends/ort/ort_backend.h
index 3200c293523..8556763e0b5 100644
--- a/fastdeploy/backends/ort/ort_backend.h
+++ b/fastdeploy/backends/ort/ort_backend.h
@@ -20,7 +20,7 @@
 #include <vector>
 
 #include "fastdeploy/backends/backend.h"
-#include "onnxruntime_cxx_api.h" // NOLINT
+#include "onnxruntime_cxx_api.h"  // NOLINT
 
 namespace fastdeploy {
 
@@ -68,6 +68,8 @@ class OrtBackend : public BaseBackend {
 
   TensorInfo GetInputInfo(int index);
   TensorInfo GetOutputInfo(int index);
+  static std::vector<OrtCustomOp*> custom_operators_;
+  void InitCustomOperators();
 
  private:
   Ort::Env env_;
@@ -76,9 +78,8 @@ class OrtBackend : public BaseBackend {
   std::shared_ptr<Ort::IoBinding> binding_;
   std::vector<OrtValueInfo> inputs_desc_;
   std::vector<OrtValueInfo> outputs_desc_;
-
+  Ort::CustomOpDomain custom_op_domain_ = Ort::CustomOpDomain("Paddle");
   OrtBackendOption option_;
-
   void CopyToCpu(const Ort::Value& value, FDTensor* tensor);
 };
-} // namespace fastdeploy
+}  // namespace fastdeploy
diff --git a/fastdeploy/backends/paddle/util.cc b/fastdeploy/backends/paddle/util.cc
index 2469596aed4..b2df989d4a0 100644
--- a/fastdeploy/backends/paddle/util.cc
+++ b/fastdeploy/backends/paddle/util.cc
@@ -17,6 +17,7 @@
 namespace fastdeploy {
 void ShareTensorFromCpu(paddle_infer::Tensor* tensor, FDTensor& fd_tensor) {
   std::vector<int> shape(fd_tensor.shape.begin(), fd_tensor.shape.end());
+  tensor->Reshape(shape);
   if (fd_tensor.dtype == FDDataType::FP32) {
     tensor->ShareExternalData(static_cast<const float*>(fd_tensor.Data()),
                               shape, paddle_infer::PlaceType::kCPU);
diff --git a/fastdeploy/fastdeploy_model.cc b/fastdeploy/fastdeploy_model.cc
index 97a5d9bc45a..c61eea7cb6e 100644
--- a/fastdeploy/fastdeploy_model.cc
+++ b/fastdeploy/fastdeploy_model.cc
@@ -18,7 +18,7 @@ namespace fastdeploy {
 
 bool FastDeployModel::InitRuntime() {
   FDASSERT(
-      ModelFormatCheck(runtime_option.model_file, runtime_option.model_format),
+      CheckModelFormat(runtime_option.model_file, runtime_option.model_format),
       "ModelFormatCheck Failed.");
   if (runtime_initialized_) {
     FDERROR << "The model is already initialized, cannot be initliazed again."
diff --git a/fastdeploy/fastdeploy_runtime.cc b/fastdeploy/fastdeploy_runtime.cc
index 6ee9fb3a850..e353c641676 100644
--- a/fastdeploy/fastdeploy_runtime.cc
+++ b/fastdeploy/fastdeploy_runtime.cc
@@ -72,7 +72,7 @@ std::string Str(const Frontend& f) {
   return "UNKNOWN-Frontend";
 }
 
-bool ModelFormatCheck(const std::string& model_file,
+bool CheckModelFormat(const std::string& model_file,
                       const Frontend& model_format) {
   if (model_format == Frontend::PADDLE) {
     if (model_file.size() < 8 ||
@@ -99,8 +99,28 @@ bool ModelFormatCheck(const std::string& model_file,
   return true;
 }
 
+Frontend GuessModelFormat(const std::string& model_file) {
+  if (model_file.size() > 8 &&
+      model_file.substr(model_file.size() - 8, 8) == ".pdmodel") {
+    FDLogger() << "Model Format: PaddlePaddle." << std::endl;
+    return Frontend::PADDLE;
+  } else if (model_file.size() > 5 &&
+             model_file.substr(model_file.size() - 5, 5) == ".onnx") {
+    FDLogger() << "Model Format: ONNX." << std::endl;
+    return Frontend::ONNX;
+  }
+
+  FDERROR << "Cannot guess which model format you are using, please set "
+             "RuntimeOption::model_format manually."
+          << std::endl;
+  return Frontend::PADDLE;
+}
+
 bool Runtime::Init(const RuntimeOption& _option) {
   option = _option;
+  if (option.model_format == Frontend::AUTOREC) {
+    option.model_format = GuessModelFormat(_option.model_file);
+  }
   if (option.backend == Backend::UNKNOWN) {
     if (IsBackendAvailable(Backend::ORT)) {
       option.backend = Backend::ORT;
@@ -124,6 +144,9 @@ bool Runtime::Init(const RuntimeOption& _option) {
   } else if (option.backend == Backend::PDINFER) {
     FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
              "Backend::TRT only supports Device::CPU/Device::GPU.");
+    FDASSERT(
+        option.model_format == Frontend::PADDLE,
+        "Backend::PDINFER only supports model format of Frontend::PADDLE.");
     CreatePaddleBackend();
   } else {
     FDERROR << "Runtime only support "
@@ -163,8 +186,8 @@ void Runtime::CreatePaddleBackend() {
            "Load model from Paddle failed while initliazing PaddleBackend.");
 #else
   FDASSERT(false,
-           "OrtBackend is not available, please compiled with "
-           "ENABLE_ORT_BACKEND=ON.");
+           "PaddleBackend is not available, please compiled with "
+           "ENABLE_PADDLE_BACKEND=ON.");
 #endif
 }
 
diff --git a/fastdeploy/fastdeploy_runtime.h b/fastdeploy/fastdeploy_runtime.h
index eb88746321c..7ec08e9d9de 100644
--- a/fastdeploy/fastdeploy_runtime.h
+++ b/fastdeploy/fastdeploy_runtime.h
@@ -21,7 +21,9 @@
 namespace fastdeploy {
 
 enum FASTDEPLOY_DECL Backend { UNKNOWN, ORT, TRT, PDINFER };
-enum FASTDEPLOY_DECL Frontend { PADDLE, ONNX };
+// AUTOREC will according to the name of model file
+// to decide which Frontend is
+enum FASTDEPLOY_DECL Frontend { AUTOREC, PADDLE, ONNX };
 
 FASTDEPLOY_DECL std::string Str(const Backend& b);
 FASTDEPLOY_DECL std::string Str(const Frontend& f);
@@ -29,8 +31,9 @@ FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();
 
 FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
 
-bool ModelFormatCheck(const std::string& model_file,
+bool CheckModelFormat(const std::string& model_file,
                       const Frontend& model_format);
+Frontend GuessModelFormat(const std::string& model_file);
 
 struct FASTDEPLOY_DECL RuntimeOption {
   Backend backend = Backend::UNKNOWN;
@@ -71,7 +74,7 @@ struct FASTDEPLOY_DECL RuntimeOption {
 
   std::string model_file = "";   // Path of model file
   std::string params_file = "";  // Path of parameters file, can be empty
-  Frontend model_format = Frontend::PADDLE;  // format of input model
+  Frontend model_format = Frontend::AUTOREC;  // format of input model
 };
 
 struct FASTDEPLOY_DECL Runtime {
diff --git a/setup.py b/setup.py
index e76f057b1c0..19c47ed9cc8 100644
--- a/setup.py
+++ b/setup.py
@@ -126,6 +126,15 @@ def finalize_options(self):
         pass
 
 
+def GetAllFiles(dirname):
+    files = list()
+    for root, dirs, filenames in os.walk(dirname):
+        for f in filenames:
+            fullname = os.path.join(root, f)
+            files.append(fullname)
+    return files
+
+
 class create_version(ONNXCommand):
     def run(self):
         with open(os.path.join(SRC_DIR, 'version.py'), 'w') as f:
@@ -326,50 +335,49 @@ def run(self):
     shutil.copy("LICENSE", "fastdeploy")
     depend_libs = list()
 
-    if platform.system().lower() == "linux":
-        for f in os.listdir(".setuptools-cmake-build"):
-            full_name = os.path.join(".setuptools-cmake-build", f)
-            if not os.path.isfile(full_name):
-                continue
-            if not full_name.count("fastdeploy_main.cpython-"):
-                continue
-            if not full_name.endswith(".so"):
-                continue
-            # modify the search path of libraries
-            command = "patchelf --set-rpath '$ORIGIN/libs/' {}".format(
-                full_name)
-            # The sw_64 not suppot patchelf, so we just disable that.
-            if platform.machine() != 'sw_64' and platform.machine(
-            ) != 'mips64':
-                assert os.system(
-                    command
-                ) == 0, "patch fastdeploy_main.cpython-36m-x86_64-linux-gnu.so failed, the command: {}".format(
-                    command)
-
+    # copy fastdeploy library
+    pybind_so_file = None
     for f in os.listdir(".setuptools-cmake-build"):
         if not os.path.isfile(os.path.join(".setuptools-cmake-build", f)):
             continue
-        if f.count("libfastdeploy") > 0:
+        if f.count("fastdeploy") > 0:
             shutil.copy(
                 os.path.join(".setuptools-cmake-build", f), "fastdeploy/libs")
-    for dirname in os.listdir(".setuptools-cmake-build/third_libs/install"):
-        for lib in os.listdir(
-                os.path.join(".setuptools-cmake-build/third_libs/install",
-                             dirname, "lib")):
-            if lib.count(".so") == 0 and lib.count(
-                    ".dylib") == 0 and lib.count(".a") == 0:
-                continue
-            if not os.path.isfile(
-                    os.path.join(".setuptools-cmake-build/third_libs/install",
-                                 dirname, "lib", lib)):
-                continue
-            shutil.copy(
-                os.path.join(".setuptools-cmake-build/third_libs/install",
-                             dirname, "lib", lib), "fastdeploy/libs")
+        if f.count("fastdeploy_main.cpython-"):
+            pybind_so_file = f
 
-    all_libs = os.listdir("fastdeploy/libs")
-    for lib in all_libs:
-        package_data[PACKAGE_NAME].append(os.path.join("libs", lib))
+    if not os.path.exists(".setuptools-cmake-build/third_libs/install"):
+        raise Exception(
+            "Cannot find directory third_libs/install in .setuptools-cmake-build."
+        )
+
+    if os.path.exists("fastdeploy/libs/third_libs"):
+        shutil.rmtree("fastdeploy/libs/third_libs")
+    shutil.copytree(
+        ".setuptools-cmake-build/third_libs/install",
+        "fastdeploy/libs/third_libs",
+        symlinks=True)
+
+    all_files = GetAllFiles("fastdeploy/libs")
+    for f in all_files:
+        package_data[PACKAGE_NAME].append(os.path.relpath(f, "fastdeploy"))
+
+    if platform.system().lower() == "linux":
+        rpaths = ["${ORIGIN}"]
+        for root, dirs, files in os.walk("fastdeploy/libs/third_libs"):
+            for d in dirs:
+                if d == "lib":
+                    path = os.path.relpath(
+                        os.path.join(root, d), "fastdeploy/libs")
+                    rpaths.append("${ORIGIN}/" + format(path))
+        rpaths = ":".join(rpaths)
+        command = "patchelf --set-rpath '{}' ".format(rpaths) + os.path.join(
+            "fastdeploy/libs", pybind_so_file)
+        # The sw_64 not suppot patchelf, so we just disable that.
+        if platform.machine() != 'sw_64' and platform.machine() != 'mips64':
+            assert os.system(
+                command) == 0, "patchelf {} failed, the command: {}".format(
+                    command, pybind_so_file)
 
 setuptools.setup(
     name=PACKAGE_NAME,
@@ -382,9 +390,9 @@ def run(self):
     include_package_data=True,
     setup_requires=setup_requires,
     extras_require=extras_require,
-    author='paddle-infer',
-    author_email='paddle-infer@baidu.com',
-    url='https://github.com/PaddlePaddle/Paddle2ONNX.git',
+    author='fastdeploy',
+    author_email='fastdeploy@baidu.com',
+    url='https://github.com/PaddlePaddle/FastDeploy.git',
     install_requires=REQUIRED_PACKAGES,
     classifiers=[
         "Programming Language :: Python :: 3",

From 900a8fe15fb81a030b383aeb130cfadab773282f Mon Sep 17 00:00:00 2001
From: jiangjiajun <jiangjiajun@baidu.com>
Date: Sun, 24 Jul 2022 11:08:38 +0000
Subject: [PATCH 2/6] Polish cmake files and runtime apis

---
 CMakeLists.txt                              | 111 +++++++++++---------
 FastDeploy.cmake.in                         |  21 ++--
 copy_directory.py                           |  32 ++++++
 fastdeploy/backends/ort/ort_backend.cc      |   8 +-
 fastdeploy/backends/tensorrt/trt_backend.cc | 111 +++++++++++++-------
 fastdeploy/backends/tensorrt/trt_backend.h  |   1 -
 fastdeploy/fastdeploy_model.cc              |   6 +-
 fastdeploy/fastdeploy_runtime.cc            |  99 +++++++++++++++--
 fastdeploy/fastdeploy_runtime.h             |  45 +++++++-
 fastdeploy/fastdeploy_runtime.py            |  24 +++--
 fastdeploy/pybind/fastdeploy_runtime.cc     |  82 +++++++++------
 fastdeploy/utils/utils.cc                   |   5 +-
 fastdeploy/utils/utils.h                    |  17 ++-
 fastdeploy/vision/common/processors/cast.cc |  14 ++-
 model_zoo/vision/ppyoloe/cpp/CMakeLists.txt |   2 +-
 model_zoo/vision/ppyoloe/cpp/ppyoloe.cc     |   3 +-
 model_zoo/vision/yolov5/cpp/CMakeLists.txt  |   3 +-
 model_zoo/vision/yolov5/cpp/yolov5.cc       |   6 +-
 model_zoo/vision/yolox/cpp/CMakeLists.txt   |   2 +-
 model_zoo/vision/yolox/cpp/yolox.cc         |   6 +-
 model_zoo/vision/yolox/yolox.py             |   7 +-
 setup.py                                    |   4 +-
 22 files changed, 426 insertions(+), 183 deletions(-)
 create mode 100644 copy_directory.py

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fa2d421a191..141c2d1c579 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,8 +15,20 @@
 PROJECT(fastdeploy C CXX)
 CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
 
+option(CSRCS_DIR_NAME "Name of source code directory")
+option(LIBRARY_NAME "Name of build library name")
+option(PY_LIBRARY_NAME "Name of build python library name")
+if(NOT CSRCS_DIR_NAME)
+  set(CSRCS_DIR_NAME "./")
+endif()
+if(NOT LIBRARY_NAME)
+  set(LIBRARY_NAME "fastdeploy")
+endif()
+if(NOT PY_LIBRARY_NAME)
+  set(PY_LIBRARY_NAME "fastdeploy_main")
+endif()
 include(ExternalProject)
-add_subdirectory(fastdeploy)
+add_subdirectory(${CSRCS_DIR_NAME}/fastdeploy)
 include(external/utils.cmake)
 
 # Set C++11 as standard for the whole project
@@ -51,7 +63,8 @@ endif()
 
 option(BUILD_FASTDEPLOY_PYTHON "if build python lib for fastdeploy." OFF)
 
-include_directories(${PROJECT_SOURCE_DIR})
+set(HEAD_DIR "${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}")
+include_directories(${HEAD_DIR})
 include_directories(${CMAKE_CURRENT_BINARY_DIR})
 
 if (WITH_VISION_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
@@ -62,12 +75,12 @@ if (WITH_VISION_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
 endif()
 
 add_definitions(-DFASTDEPLOY_LIB)
-file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/*.cc)
-file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/backends/ort/*.cc)
-file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/backends/paddle/*.cc)
-file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/fastdeploy/backends/tensorrt/*.cpp)
-file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/vision/*.cc)
-file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/pybind/*.cc ${PROJECT_SOURCE_DIR}/fastdeploy/*_pybind.cc)
+file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*.cc)
+file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/ort/*.cc)
+file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/paddle/*.cc)
+file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cpp)
+file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc)
+file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*_pybind.cc)
 list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_VISION_SRCS})
 
 set(DEPEND_LIBS "")
@@ -117,7 +130,7 @@ if(ENABLE_TRT_BACKEND)
   endif()
   add_definitions(-DENABLE_TRT_BACKEND)
   include_directories(${TRT_DIRECTORY}/include)
-  include_directories(${PROJECT_SOURCE_DIR}/fastdeploy/backends/tensorrt/common)
+  include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/common)
   list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_TRT_SRCS})
   find_library(TRT_INFER_LIB nvinfer ${TRT_DIRECTORY}/lib)
   find_library(TRT_ONNX_LIB nvonnxparser ${TRT_DIRECTORY}/lib)
@@ -125,12 +138,16 @@ if(ENABLE_TRT_BACKEND)
   find_library(TRT_PLUGIN_LIB nvinfer_plugin ${TRT_DIRECTORY}/lib)
   list(APPEND DEPEND_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_CAFFE_LIB} ${TRT_PLUGIN_LIB})
 
-  # copy tensorrt libraries to third lib
-#  if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt")
-#    file(REMOVE_RECURSE "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib")
-#  endif()
-#  file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib")
-#  file(COPY ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_CAFFE_LIB} ${TRT_PLUGIN_LIB} DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib" FOLLOW_SYMLINK_CHAIN)
+  if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt")
+    file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt")
+  endif()
+  if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib")
+    file(REMOVE_RECURSE "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib")
+  endif()
+  find_package(Python COMPONENTS Interpreter Development REQUIRED)
+  message(STATUS "Copying ${TRT_DIRECTORY}/lib to ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib ...")
+  execute_process(COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/copy_directory.py ${TRT_DIRECTORY}/lib ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib)
+
 endif()
 
 if(ENABLE_VISION)
@@ -157,37 +174,37 @@ else()
   endif()
 endif()
 
-configure_file(${PROJECT_SOURCE_DIR}/fastdeploy/core/config.h.in ${PROJECT_SOURCE_DIR}/fastdeploy/core/config.h)
+configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h)
 configure_file(${PROJECT_SOURCE_DIR}/FastDeploy.cmake.in ${PROJECT_SOURCE_DIR}/FastDeploy.cmake @ONLY)
 
 list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_PYBIND_SRCS})
 
-add_library(fastdeploy SHARED ${ALL_DEPLOY_SRCS})
-redefine_file_macro(fastdeploy)
-set_target_properties(fastdeploy PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
+add_library(${LIBRARY_NAME} SHARED ${ALL_DEPLOY_SRCS})
+redefine_file_macro(${LIBRARY_NAME})
+set_target_properties(${LIBRARY_NAME} PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
 if(NOT APPLE)
-  set_target_properties(fastdeploy PROPERTIES LINK_FLAGS "-Wl,--start-group,--exclude-libs,ALL")
+  set_target_properties(${LIBRARY_NAME} PROPERTIES LINK_FLAGS "-Wl,--start-group,--exclude-libs,ALL")
 endif()
-set_target_properties(fastdeploy PROPERTIES LINK_FLAGS_RELEASE -s)
+set_target_properties(${LIBRARY_NAME} PROPERTIES LINK_FLAGS_RELEASE -s)
 
 file(READ "${PROJECT_SOURCE_DIR}/VERSION_NUMBER" FASTDEPLOY_VERSION)
 string(STRIP "${FASTDEPLOY_VERSION}" FASTDEPLOY_VERSION)
 if (APPLE)
 #  set_target_properties(fastdeploy PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
-  set_target_properties(fastdeploy PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
+  set_target_properties(${LIBRARY_NAME} PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
 elseif(MSVC)
 else()
-  set_target_properties(fastdeploy PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
-  set_target_properties(fastdeploy PROPERTIES LINK_FLAGS "-Wl,--exclude-libs,ALL")
-  set_target_properties(fastdeploy PROPERTIES LINK_FLAGS_RELEASE -s)
+  set_target_properties(${LIBRARY_NAME} PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
+  set_target_properties(${LIBRARY_NAME} PROPERTIES LINK_FLAGS "-Wl,--exclude-libs,ALL")
+  set_target_properties(${LIBRARY_NAME} PROPERTIES LINK_FLAGS_RELEASE -s)
 endif()
 
 find_package(OpenMP)
 if(OpenMP_CXX_FOUND)
   list(APPEND DEPEND_LIBS OpenMP::OpenMP_CXX)
 endif()
-set_target_properties(fastdeploy PROPERTIES VERSION ${FASTDEPLOY_VERSION})
-target_link_libraries(fastdeploy ${DEPEND_LIBS})
+set_target_properties(${LIBRARY_NAME} PROPERTIES VERSION ${FASTDEPLOY_VERSION})
+target_link_libraries(${LIBRARY_NAME} ${DEPEND_LIBS})
 
 # add examples after prepare include paths for third-parties
 if (WITH_VISION_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
@@ -200,15 +217,15 @@ include(external/summary.cmake)
 fastdeploy_summary()
 
 install(
-  TARGETS fastdeploy
+  TARGETS ${LIBRARY_NAME}
   LIBRARY DESTINATION lib
 )
 install(
-  DIRECTORY ${PROJECT_SOURCE_DIR}/fastdeploy
+  DIRECTORY ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy
   DESTINATION ${CMAKE_INSTALL_PREFIX}/include
   FILES_MATCHING
   PATTERN "*.h"
-  PATTERN "${PROJECT_SOURCE_DIR}/fastdeploy/backends/*/*.h"
+  PATTERN "${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/*/*.h"
 )
 install(
   DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install
@@ -243,40 +260,34 @@ if(BUILD_FASTDEPLOY_PYTHON)
   endif()
 
   if(NOT ENABLE_VISION)
-    file(GLOB_RECURSE VISION_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/vision/*_pybind.cc)
+    file(GLOB_RECURSE VISION_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*_pybind.cc)
     list(REMOVE_ITEM DEPLOY_PYBIND_SRCS ${VISION_PYBIND_SRCS})
   endif()
-  add_library(fastdeploy_main MODULE ${DEPLOY_PYBIND_SRCS})
-  redefine_file_macro(fastdeploy_main)
-  set_target_properties(fastdeploy_main PROPERTIES PREFIX "")
-  set_target_properties(fastdeploy_main
+  add_library(${PY_LIBRARY_NAME} MODULE ${DEPLOY_PYBIND_SRCS})
+  redefine_file_macro(${PY_LIBRARY_NAME})
+  set_target_properties(${PY_LIBRARY_NAME} PROPERTIES PREFIX "")
+  set_target_properties(${PY_LIBRARY_NAME}
                         PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
-  set_target_properties(fastdeploy_main PROPERTIES SUFFIX ${PY_EXT_SUFFIX})
-  set_target_properties(fastdeploy_main
+  set_target_properties(${PY_LIBRARY_NAME} PROPERTIES SUFFIX ${PY_EXT_SUFFIX})
+  set_target_properties(${PY_LIBRARY_NAME}
                         PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
-  target_include_directories(fastdeploy_main PRIVATE
+  target_include_directories(${PY_LIBRARY_NAME} PRIVATE
                              $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
                              $<INSTALL_INTERFACE:include>
                              ${PYTHON_INCLUDE_DIR})
 
-  target_include_directories(fastdeploy_main PUBLIC ${PROJECT_SOURCE_DIR}/third_party/pybind11/include)
+  target_include_directories(${PY_LIBRARY_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/third_party/pybind11/include)
 
   if(APPLE)
-    set_target_properties(fastdeploy_main
+    set_target_properties(${PY_LIBRARY_NAME}
                           PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
   endif()
 
-  if(APPLE)
-    target_link_libraries(fastdeploy_main PUBLIC fastdeploy)
-  elseif(WIN32)
-    target_link_libraries(fastdeploy_main PUBLIC fastdeploy)
-  else()
-    target_link_libraries(fastdeploy_main PUBLIC fastdeploy)
-  endif()
+  target_link_libraries(${PY_LIBRARY_NAME} PUBLIC ${LIBRARY_NAME})
 
   if(MSVC)
-    target_link_libraries(fastdeploy_main PRIVATE ${PYTHON_LIBRARIES})
-    target_compile_options(fastdeploy_main
+    target_link_libraries(${PY_LIBRARY_NAME} PRIVATE ${PYTHON_LIBRARIES})
+    target_compile_options(${PY_LIBRARY_NAME}
                            PRIVATE /MP
                                    /wd4244 # 'argument': conversion from 'google::
                                            # protobuf::uint64' to 'int', possible
@@ -285,7 +296,7 @@ if(BUILD_FASTDEPLOY_PYTHON)
                                            # possible loss of data
                                    /wd4996 # The second parameter is ignored.
                                    ${EXTRA_FLAGS})
-    target_compile_options(fastdeploy_main PRIVATE $<$<NOT:$<CONFIG:Debug>>:/MT> $<$<CONFIG:Debug>:/MTd>)
+    target_compile_options(${PY_LIBRARY_NAME} PRIVATE $<$<NOT:$<CONFIG:Debug>>:/MT> $<$<CONFIG:Debug>:/MTd>)
   endif()
 
 endif(BUILD_FASTDEPLOY_PYTHON)
diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in
index e8c0bb3d593..ccf2a574b3b 100644
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -8,6 +8,7 @@ set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND@)
 set(ENABLE_PADDLE_FRONTEND @ENABLE_PADDLE_FRONTEND@)
 set(ENABLE_VISION @ENABLE_VISION@)
 set(ENABLE_OPENCV_CUDA @ENABLE_OPENCV_CUDA@)
+set(LIBRARY_NAME @LIBRARY_NAME@)
 
 set(FASTDEPLOY_LIBS "")
 set(FASTDEPLOY_INCS "")
@@ -17,7 +18,7 @@ if(NOT CMAKE_CXX_STANDARD)
   set(CMAKE_CXX_STANDARD 11)
 endif()
 
-find_library(FDLIB fastdeploy ${CMAKE_CURRENT_LIST_DIR}/lib)
+find_library(FDLIB ${LIBRARY_NAME} ${CMAKE_CURRENT_LIST_DIR}/lib)
 list(APPEND FASTDEPLOY_LIBS ${FDLIB})
 
 if(ENABLE_ORT_BACKEND)
@@ -51,13 +52,17 @@ if(WITH_GPU)
   list(APPEND FASTDEPLOY_LIBS ${CUDA_LIB})
 
   if (ENABLE_TRT_BACKEND)
-    if (NOT TRT_DIRECTORY)
-      message(FATAL_ERROR "[FastDeploy] Please define TRT_DIRECTORY, e.g -DTRT_DIRECTORY=/usr/downloads/TensorRT-8.4.1.0")
-    endif()
-    find_library(TRT_INFER_LIB nvinfer ${TRT_DIRECTORY}/lib)
-    find_library(TRT_ONNX_LIB nvonnxparser ${TRT_DIRECTORY}/lib)
-    find_library(TRT_CAFFE_LIB nvcaffe_parser ${TRT_DIRECTORY}/lib)
-    find_library(TRT_PLUGIN_LIB nvinfer_plugin ${TRT_DIRECTORY}/lib)
+#    if (NOT TRT_DIRECTORY)
+#      message(FATAL_ERROR "[FastDeploy] Please define TRT_DIRECTORY, e.g -DTRT_DIRECTORY=/usr/downloads/TensorRT-8.4.1.0")
+#    endif()
+#    find_library(TRT_INFER_LIB nvinfer ${TRT_DIRECTORY}/lib)
+#    find_library(TRT_ONNX_LIB nvonnxparser ${TRT_DIRECTORY}/lib)
+#    find_library(TRT_CAFFE_LIB nvcaffe_parser ${TRT_DIRECTORY}/lib)
+#    find_library(TRT_PLUGIN_LIB nvinfer_plugin ${TRT_DIRECTORY}/lib)
+    find_library(TRT_INFER_LIB nvinfer ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib)
+    find_library(TRT_ONNX_LIB nvonnxparser ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib)
+    find_library(TRT_CAFFE_LIB nvcaffe_parser ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib)
+    find_library(TRT_PLUGIN_LIB nvinfer_plugin ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib)
     list(APPEND FASTDEPLOY_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_CAFFE_LIB} ${TRT_PLUGIN_LIB})
   endif()
 endif()
diff --git a/copy_directory.py b/copy_directory.py
new file mode 100644
index 00000000000..f0313db3c2b
--- /dev/null
+++ b/copy_directory.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import shutil
+import os
+import sys
+
+
+def copy_directory(src, dst):
+    if os.path.exists(dst):
+        raise Exception("Destination {} is already exist.".format(dst))
+    if not os.path.exists(src):
+        raise Exception("Source {} is not exist.".format(src))
+    try:
+        shutil.copytree(src, dst, symlinks=True)
+    except:
+        raise Exception("Copy {} to {} failed.".format(src, dst))
+
+
+if __name__ == "__main__":
+    copy_directory(sys.argv[1], sys.argv[2])
diff --git a/fastdeploy/backends/ort/ort_backend.cc b/fastdeploy/backends/ort/ort_backend.cc
index 909b5f28752..f5d0bfdd98e 100644
--- a/fastdeploy/backends/ort/ort_backend.cc
+++ b/fastdeploy/backends/ort/ort_backend.cc
@@ -81,10 +81,10 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
       }
     }
     if (!support_cuda) {
-      FDLogger() << "[WARN] Compiled fastdeploy with onnxruntime doesn't "
-                    "support GPU, the available providers are "
-                 << providers_msg << "will fallback to CPUExecutionProvider."
-                 << std::endl;
+      FDWARNING << "Compiled fastdeploy with onnxruntime doesn't "
+                   "support GPU, the available providers are "
+                << providers_msg << "will fallback to CPUExecutionProvider."
+                << std::endl;
       option_.use_gpu = false;
     } else {
       FDASSERT(option.gpu_id == 0, "Requires gpu_id == 0, but now gpu_id = " +
diff --git a/fastdeploy/backends/tensorrt/trt_backend.cc b/fastdeploy/backends/tensorrt/trt_backend.cc
index a29af6e9a37..d050cc9f228 100644
--- a/fastdeploy/backends/tensorrt/trt_backend.cc
+++ b/fastdeploy/backends/tensorrt/trt_backend.cc
@@ -52,6 +52,61 @@ std::vector<int> toVec(const nvinfer1::Dims& dim) {
   return out;
 }
 
+bool CheckDynamicShapeConfig(const paddle2onnx::OnnxReader& reader,
+                             const TrtBackendOption& option) {
+  paddle2onnx::ModelTensorInfo inputs[reader.NumInputs()];
+  std::string input_shapes[reader.NumInputs()];
+  for (int i = 0; i < reader.NumInputs(); ++i) {
+    reader.GetInputInfo(i, &inputs[i]);
+
+    // change 0 to -1, when input_dim is a string, onnx will make it to zero
+    for (int j = 0; j < inputs[i].rank; ++j) {
+      if (inputs[i].shape[j] <= 0) {
+        inputs[i].shape[j] = -1;
+      }
+    }
+
+    input_shapes[i] = "";
+    for (int j = 0; j < inputs[i].rank; ++j) {
+      if (j != inputs[i].rank - 1) {
+        input_shapes[i] += (std::to_string(inputs[i].shape[j]) + ", ");
+      } else {
+        input_shapes[i] += std::to_string(inputs[i].shape[j]);
+      }
+    }
+  }
+
+  bool all_check_passed = true;
+  for (int i = 0; i < reader.NumInputs(); ++i) {
+    bool contain_unknown_dim = false;
+    for (int j = 0; j < inputs[i].rank; ++j) {
+      if (inputs[i].shape[j] < 0) {
+        contain_unknown_dim = true;
+      }
+    }
+
+    std::string name(inputs[i].name, strlen(inputs[i].name));
+    FDINFO << "The loaded model's input tensor:" << name
+           << " has shape [" + input_shapes[i] << "]." << std::endl;
+    if (contain_unknown_dim) {
+      auto iter1 = option.min_shape.find(name);
+      auto iter2 = option.max_shape.find(name);
+      auto iter3 = option.opt_shape.find(name);
+      if (iter1 == option.min_shape.end() || iter2 == option.max_shape.end() ||
+          iter3 == option.opt_shape.end()) {
+        FDERROR << "The loaded model's input tensor:" << name
+                << " has dynamic shape [" + input_shapes[i] +
+                       "], but didn't configure it's shape for tensorrt with "
+                       "SetTrtInputShape correctly."
+                << std::endl;
+        all_check_passed = false;
+      }
+    }
+  }
+
+  return all_check_passed;
+}
+
 bool TrtBackend::InitFromTrt(const std::string& trt_engine_file,
                              const TrtBackendOption& option) {
   if (initialized_) {
@@ -167,13 +222,17 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
         onnx_reader.output_names[i] + strlen(onnx_reader.output_names[i]));
     outputs_order_[name] = i;
   }
+  if (!CheckDynamicShapeConfig(onnx_reader, option)) {
+    FDERROR << "TrtBackend::CheckDynamicShapeConfig failed." << std::endl;
+    return false;
+  }
 
   if (option.serialize_file != "") {
     std::ifstream fin(option.serialize_file, std::ios::binary | std::ios::in);
     if (fin) {
-      FDLogger() << "Detect serialized TensorRT Engine file in "
-                 << option.serialize_file << ", will load it directly."
-                 << std::endl;
+      FDINFO << "Detect serialized TensorRT Engine file in "
+             << option.serialize_file << ", will load it directly."
+             << std::endl;
       fin.close();
       return InitFromTrt(option.serialize_file);
     }
@@ -311,9 +370,9 @@ bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
 
   if (option.enable_fp16) {
     if (!builder->platformHasFastFp16()) {
-      FDLogger() << "[WARN] Detected FP16 is not supported in the current GPU, "
-                    "will use FP32 instead."
-                 << std::endl;
+      FDWARNING << "Detected FP16 is not supported in the current GPU, "
+                   "will use FP32 instead."
+                << std::endl;
     } else {
       config->setFlag(nvinfer1::BuilderFlag::kFP16);
     }
@@ -330,33 +389,13 @@ bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
     return false;
   }
 
-  FDLogger() << "Start to building TensorRT Engine..." << std::endl;
+  FDINFO << "Start to building TensorRT Engine..." << std::endl;
   bool fp16 = builder->platformHasFastFp16();
   builder->setMaxBatchSize(option.max_batch_size);
 
   config->setMaxWorkspaceSize(option.max_workspace_size);
 
-  if (option.fixed_shape.size() > 0) {
-    auto profile = builder->createOptimizationProfile();
-    for (auto& item : option.fixed_shape) {
-      FDASSERT(profile->setDimensions(item.first.c_str(),
-                                      nvinfer1::OptProfileSelector::kMIN,
-                                      sample::toDims(item.second)),
-               "[TrtBackend] Failed to set min_shape for input: " + item.first +
-                   " in TrtBackend.");
-      FDASSERT(profile->setDimensions(item.first.c_str(),
-                                      nvinfer1::OptProfileSelector::kOPT,
-                                      sample::toDims(item.second)),
-               "[TrtBackend] Failed to set min_shape for input: " + item.first +
-                   " in TrtBackend.");
-      FDASSERT(profile->setDimensions(item.first.c_str(),
-                                      nvinfer1::OptProfileSelector::kMAX,
-                                      sample::toDims(item.second)),
-               "[TrtBackend] Failed to set min_shape for input: " + item.first +
-                   " in TrtBackend.");
-    }
-    config->addOptimizationProfile(profile);
-  } else if (option.max_shape.size() > 0) {
+  if (option.max_shape.size() > 0) {
     auto profile = builder->createOptimizationProfile();
     FDASSERT(option.max_shape.size() == option.min_shape.size() &&
                  option.min_shape.size() == option.opt_shape.size(),
@@ -416,10 +455,10 @@ bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
     return false;
   }
 
-  FDLogger() << "TensorRT Engine is built succussfully." << std::endl;
+  FDINFO << "TensorRT Engine is built succussfully." << std::endl;
   if (option.serialize_file != "") {
-    FDLogger() << "Serialize TensorRTEngine to local file "
-               << option.serialize_file << "." << std::endl;
+    FDINFO << "Serialize TensorRTEngine to local file " << option.serialize_file
+           << "." << std::endl;
     std::ofstream engine_file(option.serialize_file.c_str());
     if (!engine_file) {
       FDERROR << "Failed to open " << option.serialize_file << " to write."
@@ -428,11 +467,11 @@ bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
     }
     engine_file.write(static_cast<char*>(plan->data()), plan->size());
     engine_file.close();
-    FDLogger() << "TensorRTEngine is serialized to local file "
-               << option.serialize_file
-               << ", we can load this model from the seralized engine "
-                  "directly next time."
-               << std::endl;
+    FDINFO << "TensorRTEngine is serialized to local file "
+           << option.serialize_file
+           << ", we can load this model from the seralized engine "
+              "directly next time."
+           << std::endl;
   }
   return true;
 }
diff --git a/fastdeploy/backends/tensorrt/trt_backend.h b/fastdeploy/backends/tensorrt/trt_backend.h
index 1da7f147144..27e6e552b49 100644
--- a/fastdeploy/backends/tensorrt/trt_backend.h
+++ b/fastdeploy/backends/tensorrt/trt_backend.h
@@ -46,7 +46,6 @@ struct TrtBackendOption {
   bool enable_int8 = false;
   size_t max_batch_size = 32;
   size_t max_workspace_size = 1 << 30;
-  std::map<std::string, std::vector<int32_t>> fixed_shape;
   std::map<std::string, std::vector<int32_t>> max_shape;
   std::map<std::string, std::vector<int32_t>> min_shape;
   std::map<std::string, std::vector<int32_t>> opt_shape;
diff --git a/fastdeploy/fastdeploy_model.cc b/fastdeploy/fastdeploy_model.cc
index c61eea7cb6e..e434e19fa5b 100644
--- a/fastdeploy/fastdeploy_model.cc
+++ b/fastdeploy/fastdeploy_model.cc
@@ -132,9 +132,9 @@ void FastDeployModel::EnableDebug() {
 #ifdef FASTDEPLOY_DEBUG
   debug_ = true;
 #else
-  FDLogger() << "The compile FastDeploy is not with -DENABLE_DEBUG=ON, so "
-                "cannot enable debug mode."
-             << std::endl;
+  FDWARNING << "The compile FastDeploy is not with -DENABLE_DEBUG=ON, so "
+               "cannot enable debug mode."
+            << std::endl;
   debug_ = false;
 #endif
 }
diff --git a/fastdeploy/fastdeploy_runtime.cc b/fastdeploy/fastdeploy_runtime.cc
index e353c641676..f88f7f5794b 100644
--- a/fastdeploy/fastdeploy_runtime.cc
+++ b/fastdeploy/fastdeploy_runtime.cc
@@ -77,23 +77,23 @@ bool CheckModelFormat(const std::string& model_file,
   if (model_format == Frontend::PADDLE) {
     if (model_file.size() < 8 ||
         model_file.substr(model_file.size() - 8, 8) != ".pdmodel") {
-      FDLogger() << "With model format of Frontend::PADDLE, the model file "
-                    "should ends with `.pdmodel`, but now it's "
-                 << model_file << std::endl;
+      FDERROR << "With model format of Frontend::PADDLE, the model file "
+                 "should ends with `.pdmodel`, but now it's "
+              << model_file << std::endl;
       return false;
     }
   } else if (model_format == Frontend::ONNX) {
     if (model_file.size() < 5 ||
         model_file.substr(model_file.size() - 5, 5) != ".onnx") {
-      FDLogger() << "With model format of Frontend::ONNX, the model file "
-                    "should ends with `.onnx`, but now it's "
-                 << model_file << std::endl;
+      FDERROR << "With model format of Frontend::ONNX, the model file "
+                 "should ends with `.onnx`, but now it's "
+              << model_file << std::endl;
       return false;
     }
   } else {
-    FDLogger() << "Only support model format with frontend Frontend::PADDLE / "
-                  "Frontend::ONNX."
-               << std::endl;
+    FDERROR << "Only support model format with frontend Frontend::PADDLE / "
+               "Frontend::ONNX."
+            << std::endl;
     return false;
   }
   return true;
@@ -116,6 +116,86 @@ Frontend GuessModelFormat(const std::string& model_file) {
   return Frontend::PADDLE;
 }
 
+void RuntimeOption::UseGpu(int gpu_id) {
+#ifdef WITH_GPU
+  device = Device::GPU;
+  device_id = gpu_id;
+#else
+  FDWARNING << "The FastDeploy didn't compile with GPU, will force to use CPU."
+            << std::endl;
+  device = Device::CPU;
+#endif
+}
+
+void RuntimeOption::UseCpu() { device = Device::CPU; }
+
+void RuntimeOption::SetCpuThreadNum(int thread_num) {
+  FDASSERT(thread_num > 0, "The thread_num must be greater than 0.");
+  cpu_thread_num = thread_num;
+}
+
+// use paddle inference backend
+void RuntimeOption::UsePaddleBackend() {
+#ifdef ENABLE_PADDLE_BACKEND
+  backend = Backend::PDINFER;
+#else
+  FDASSERT(false, "The FastDeploy didn't compile with Paddle Inference.");
+#endif
+}
+
+// use onnxruntime backend
+void RuntimeOption::UseOrtBackend() {
+#ifdef ENABLE_ORT_BACKEND
+  backend = Backend::ORT;
+#else
+  FDASSERT(false, "The FastDeploy didn't compile with OrtBackend.");
+#endif
+}
+
+void RuntimeOption::UseTrtBackend() {
+#ifdef ENABLE_TRT_BACKEND
+  backend = Backend::TRT;
+#else
+  FDASSERT(false, "The FastDeploy didn't compile with TrtBackend.");
+#endif
+}
+
+void RuntimeOption::EnablePaddleMKLDNN() { pd_enable_mkldnn = true; }
+
+void RuntimeOption::DisablePaddleMKLDNN() { pd_enable_mkldnn = false; }
+
+void RuntimeOption::SetPaddleMKLDNNCacheSize(int size) {
+  FDASSERT(size > 0, "Parameter size must greater than 0.");
+  pd_mkldnn_cache_size = size;
+}
+
+void RuntimeOption::SetTrtInputShape(const std::string& input_name,
+                                     const std::vector<int32_t>& min_shape,
+                                     const std::vector<int32_t>& opt_shape,
+                                     const std::vector<int32_t>& max_shape) {
+  trt_min_shape[input_name].clear();
+  trt_max_shape[input_name].clear();
+  trt_opt_shape[input_name].clear();
+  trt_min_shape[input_name].assign(min_shape.begin(), min_shape.end());
+  if (opt_shape.size() == 0) {
+    trt_opt_shape[input_name].assign(min_shape.begin(), min_shape.end());
+  } else {
+    trt_opt_shape[input_name].assign(opt_shape.begin(), opt_shape.end());
+  }
+  if (max_shape.size() == 0) {
+    trt_max_shape[input_name].assign(min_shape.begin(), min_shape.end());
+  } else {
+    trt_max_shape[input_name].assign(max_shape.begin(), max_shape.end());
+  }
+  FDINFO << trt_min_shape[input_name].size() << " "
+         << trt_opt_shape[input_name].size() << " "
+         << trt_max_shape[input_name].size() << std::endl;
+}
+
+void RuntimeOption::EnableTrtFP16() { trt_enable_fp16 = true; }
+
+void RuntimeOption::DisableTrtFP16() { trt_enable_fp16 = false; }
+
 bool Runtime::Init(const RuntimeOption& _option) {
   option = _option;
   if (option.model_format == Frontend::AUTOREC) {
@@ -229,7 +309,6 @@ void Runtime::CreateTrtBackend() {
   trt_option.enable_int8 = option.trt_enable_int8;
   trt_option.max_batch_size = option.trt_max_batch_size;
   trt_option.max_workspace_size = option.trt_max_workspace_size;
-  trt_option.fixed_shape = option.trt_fixed_shape;
   trt_option.max_shape = option.trt_max_shape;
   trt_option.min_shape = option.trt_min_shape;
   trt_option.opt_shape = option.trt_opt_shape;
diff --git a/fastdeploy/fastdeploy_runtime.h b/fastdeploy/fastdeploy_runtime.h
index 7ec08e9d9de..2252cbc5850 100644
--- a/fastdeploy/fastdeploy_runtime.h
+++ b/fastdeploy/fastdeploy_runtime.h
@@ -36,8 +36,50 @@ bool CheckModelFormat(const std::string& model_file,
 Frontend GuessModelFormat(const std::string& model_file);
 
 struct FASTDEPLOY_DECL RuntimeOption {
-  Backend backend = Backend::UNKNOWN;
+  // set model inference in GPU
+  void UseCpu();
+
+  // set model inference in CPU
+  void UseGpu(int gpu_id = 0);
+
+  // set number of thread while inference in CPU
+  void SetCpuThreadNum(int thread_num);
+
+  // use paddle inference backend
+  void UsePaddleBackend();
+
+  // use onnxruntime backend
+  void UseOrtBackend();
+
+  // use tensorrt backend
+  void UseTrtBackend();
 
+  // enable mkldnn while use paddle inference in CPU
+  void EnablePaddleMKLDNN();
+  // disable mkldnn while use paddle inference in CPU
+  void DisablePaddleMKLDNN();
+
+  // set size of cached shape while enable mkldnn with paddle inference backend
+  void SetPaddleMKLDNNCacheSize(int size);
+
+  // set tensorrt shape while the inputs of model contain dynamic shape
+  // min_shape: the minimum shape
+  // opt_shape: the most common shape while inference, default be empty
+  // max_shape: the maximum shape, default be empty
+
+  // if opt_shape, max_shape are empty, they will keep same with the min_shape
+  // which means the shape will be fixed as min_shape while inference
+  void SetTrtInputShape(
+      const std::string& input_name, const std::vector<int32_t>& min_shape,
+      const std::vector<int32_t>& opt_shape = std::vector<int32_t>(),
+      const std::vector<int32_t>& max_shape = std::vector<int32_t>());
+
+  // enable half precision while use tensorrt backend
+  void EnableTrtFP16();
+  // disable half precision, change to full precision(float32)
+  void DisableTrtFP16();
+
+  Backend backend = Backend::UNKNOWN;
   // for cpu inference and preprocess
   int cpu_thread_num = 8;
   int device_id = 0;
@@ -62,7 +104,6 @@ struct FASTDEPLOY_DECL RuntimeOption {
   int pd_mkldnn_cache_size = 1;
 
   // ======Only for Trt Backend=======
-  std::map<std::string, std::vector<int32_t>> trt_fixed_shape;
   std::map<std::string, std::vector<int32_t>> trt_max_shape;
   std::map<std::string, std::vector<int32_t>> trt_min_shape;
   std::map<std::string, std::vector<int32_t>> trt_opt_shape;
diff --git a/fastdeploy/fastdeploy_runtime.py b/fastdeploy/fastdeploy_runtime.py
index 592d1d29529..b23879b36da 100644
--- a/fastdeploy/fastdeploy_runtime.py
+++ b/fastdeploy/fastdeploy_runtime.py
@@ -55,27 +55,33 @@ def initialized(self):
         return self._model.initialized()
 
 
-class FastDeployRuntime:
+class Runtime:
     def __init__(self, runtime_option):
-        self._runtime = C.Runtime();
-        assert self._runtime.init(runtime_option), "Initialize FastDeployRuntime Failed!"
+        self._runtime = C.Runtime()
+        assert self._runtime.init(runtime_option), "Initialize Runtime Failed!"
 
     def infer(self, data):
         assert isinstance(data, dict), "The input data should be type of dict."
         return self._runtime.infer(data)
 
     def num_inputs(self):
-        return self._runtime.num_inputs();
+        return self._runtime.num_inputs()
 
     def num_outputs(self):
-        return self._runtime.num_outputs();
+        return self._runtime.num_outputs()
 
     def get_input_info(self, index):
-        assert isinstance(index, int), "The input parameter index should be type of int."
-        assert index < self.num_inputs(), "The input parameter index:{} should less than number of inputs:{}.".format(index, self.num_inputs)
+        assert isinstance(
+            index, int), "The input parameter index should be type of int."
+        assert index < self.num_inputs(
+        ), "The input parameter index:{} should less than number of inputs:{}.".format(
+            index, self.num_inputs)
         return self._runtime.get_input_info(index)
 
     def get_output_info(self, index):
-        assert isinstance(index, int), "The input parameter index should be type of int."
-        assert index < self.num_outputs(), "The input parameter index:{} should less than number of outputs:{}.".format(index, self.num_outputs)
+        assert isinstance(
+            index, int), "The input parameter index should be type of int."
+        assert index < self.num_outputs(
+        ), "The input parameter index:{} should less than number of outputs:{}.".format(
+            index, self.num_outputs)
         return self._runtime.get_output_info(index)
diff --git a/fastdeploy/pybind/fastdeploy_runtime.cc b/fastdeploy/pybind/fastdeploy_runtime.cc
index e3c6dd19ae2..3255af0d926 100644
--- a/fastdeploy/pybind/fastdeploy_runtime.cc
+++ b/fastdeploy/pybind/fastdeploy_runtime.cc
@@ -19,6 +19,19 @@ namespace fastdeploy {
 void BindRuntime(pybind11::module& m) {
   pybind11::class_<RuntimeOption>(m, "RuntimeOption")
       .def(pybind11::init())
+      .def("use_gpu", &RuntimeOption::UseGpu)
+      .def("use_cpu", &RuntimeOption::UseCpu)
+      .def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
+      .def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
+      .def("use_ort_backend", &RuntimeOption::UseOrtBackend)
+      .def("use_trt_backend", &RuntimeOption::UseTrtBackend)
+      .def("enable_paddle_mkldnn", &RuntimeOption::EnablePaddleMKLDNN)
+      .def("disable_paddle_mkldnn", &RuntimeOption::DisablePaddleMKLDNN)
+      .def("set_paddle_mkldnn_cache_size",
+           &RuntimeOption::SetPaddleMKLDNNCacheSize)
+      .def("set_trt_input_shape", &RuntimeOption::SetTrtInputShape)
+      .def("enable_trt_fp16", &RuntimeOption::EnableTrtFP16)
+      .def("disable_trt_fp16", &RuntimeOption::DisableTrtFP16)
       .def_readwrite("model_file", &RuntimeOption::model_file)
       .def_readwrite("params_file", &RuntimeOption::params_file)
       .def_readwrite("model_format", &RuntimeOption::model_format)
@@ -30,7 +43,6 @@ void BindRuntime(pybind11::module& m) {
       .def_readwrite("ort_inter_op_num_threads",
                      &RuntimeOption::ort_inter_op_num_threads)
       .def_readwrite("ort_execution_mode", &RuntimeOption::ort_execution_mode)
-      .def_readwrite("trt_fixed_shape", &RuntimeOption::trt_fixed_shape)
       .def_readwrite("trt_max_shape", &RuntimeOption::trt_max_shape)
       .def_readwrite("trt_opt_shape", &RuntimeOption::trt_opt_shape)
       .def_readwrite("trt_min_shape", &RuntimeOption::trt_min_shape)
@@ -49,41 +61,43 @@ void BindRuntime(pybind11::module& m) {
   pybind11::class_<Runtime>(m, "Runtime")
       .def(pybind11::init())
       .def("init", &Runtime::Init)
-      .def("infer", [](Runtime& self,
-                       std::map<std::string, pybind11::array>& data) {
-        std::vector<FDTensor> inputs(data.size());
-        int index = 0;
-        for (auto iter = data.begin(); iter != data.end(); ++iter) {
-          inputs[index].dtype = NumpyDataTypeToFDDataType(iter->second.dtype());
-          inputs[index].shape.insert(
-              inputs[index].shape.begin(), iter->second.shape(),
-              iter->second.shape() + iter->second.ndim());
-          // TODO(jiangjiajun) Maybe skip memory copy is a better choice
-          // use SetExternalData
-          inputs[index].data.resize(iter->second.nbytes());
-          memcpy(inputs[index].data.data(), iter->second.mutable_data(),
-                 iter->second.nbytes());
-          inputs[index].name = iter->first;
-        }
+      .def("infer",
+           [](Runtime& self, std::map<std::string, pybind11::array>& data) {
+             std::vector<FDTensor> inputs(data.size());
+             int index = 0;
+             for (auto iter = data.begin(); iter != data.end(); ++iter) {
+               inputs[index].dtype =
+                   NumpyDataTypeToFDDataType(iter->second.dtype());
+               inputs[index].shape.insert(
+                   inputs[index].shape.begin(), iter->second.shape(),
+                   iter->second.shape() + iter->second.ndim());
+               // TODO(jiangjiajun) Maybe skip memory copy is a better choice
+               // use SetExternalData
+               inputs[index].data.resize(iter->second.nbytes());
+               memcpy(inputs[index].data.data(), iter->second.mutable_data(),
+                      iter->second.nbytes());
+               inputs[index].name = iter->first;
+             }
 
-        std::vector<FDTensor> outputs(self.NumOutputs());
-        self.Infer(inputs, &outputs);
+             std::vector<FDTensor> outputs(self.NumOutputs());
+             self.Infer(inputs, &outputs);
 
-        std::vector<pybind11::array> results;
-        results.reserve(outputs.size());
-        for (size_t i = 0; i < outputs.size(); ++i) {
-          auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
-          results.emplace_back(pybind11::array(numpy_dtype, outputs[i].shape));
-          memcpy(results[i].mutable_data(), outputs[i].data.data(),
-                 outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
-        }
-        return results;
-      })
-     .def("num_inputs", &Runtime::NumInputs)
-     .def("num_outputs", &Runtime::NumOutputs)
-     .def("get_input_info", &Runtime::GetInputInfo)
-     .def("get_output_info", &Runtime::GetOutputInfo)
-     .def_readonly("option", &Runtime::option);
+             std::vector<pybind11::array> results;
+             results.reserve(outputs.size());
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
+               results.emplace_back(
+                   pybind11::array(numpy_dtype, outputs[i].shape));
+               memcpy(results[i].mutable_data(), outputs[i].data.data(),
+                      outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
+             }
+             return results;
+           })
+      .def("num_inputs", &Runtime::NumInputs)
+      .def("num_outputs", &Runtime::NumOutputs)
+      .def("get_input_info", &Runtime::GetInputInfo)
+      .def("get_output_info", &Runtime::GetOutputInfo)
+      .def_readonly("option", &Runtime::option);
 
   pybind11::enum_<Backend>(m, "Backend", pybind11::arithmetic(),
                            "Backend for inference.")
diff --git a/fastdeploy/utils/utils.cc b/fastdeploy/utils/utils.cc
index e4e5d1472df..95e94c4fec9 100644
--- a/fastdeploy/utils/utils.cc
+++ b/fastdeploy/utils/utils.cc
@@ -16,6 +16,9 @@
 
 namespace fastdeploy {
 
+bool DISABLE_WARNING = false;
+bool DISABLE_INFO = false;
+
 FDLogger::FDLogger(bool verbose, const std::string& prefix) {
   verbose_ = verbose;
   line_ = "";
@@ -31,4 +34,4 @@ FDLogger& FDLogger::operator<<(std::ostream& (*os)(std::ostream&)) {
   return *this;
 }
 
-} // namespace fastdeploy
+}  // namespace fastdeploy
diff --git a/fastdeploy/utils/utils.h b/fastdeploy/utils/utils.h
index b57a27f80c3..89dcb57104a 100644
--- a/fastdeploy/utils/utils.h
+++ b/fastdeploy/utils/utils.h
@@ -33,6 +33,9 @@
 
 namespace fastdeploy {
 
+extern bool DISABLE_WARNING;
+extern bool DISABLE_INFO;
+
 class FASTDEPLOY_DECL FDLogger {
  public:
   FDLogger() {
@@ -69,13 +72,17 @@ class FASTDEPLOY_DECL FDLogger {
 #define __REL_FILE__ __FILE__
 #endif
 
-#define FDERROR                                                                \
-  FDLogger(true, "[ERROR]")                                                    \
+#define FDERROR             \
+  FDLogger(true, "[ERROR]") \
+      << __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t"
+
+#define FDWARNING             \
+  FDLogger(true, "[WARNING]") \
       << __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t"
 
-#define FDERROR                                                \
-  FDLogger(true, "[ERROR]") << __REL_FILE__ << "(" << __LINE__ \
-                            << ")::" << __FUNCTION__ << "\t"
+#define FDINFO                                                \
+  FDLogger(true, "[INFO]") << __REL_FILE__ << "(" << __LINE__ \
+                           << ")::" << __FUNCTION__ << "\t"
 
 #define FDASSERT(condition, message) \
   if (!(condition)) {                \
diff --git a/fastdeploy/vision/common/processors/cast.cc b/fastdeploy/vision/common/processors/cast.cc
index 77a1b249ae3..b9a757f142c 100644
--- a/fastdeploy/vision/common/processors/cast.cc
+++ b/fastdeploy/vision/common/processors/cast.cc
@@ -29,9 +29,8 @@ bool Cast::CpuRun(Mat* mat) {
       im->convertTo(*im, CV_64FC(c));
     }
   } else {
-    FDLogger() << "[WARN] Cast not support for " << dtype_ 
-               << " now! will skip this operation."
-               << std::endl;
+    FDWARNING << "Cast not support for " << dtype_
+              << " now! will skip this operation." << std::endl;
   }
   return true;
 }
@@ -49,9 +48,8 @@ bool Cast::GpuRun(Mat* mat) {
       im->convertTo(*im, CV_64FC(c));
     }
   } else {
-    FDLogger() << "[WARN] Cast not support for " << dtype_ 
-               << " now! will skip this operation."
-               << std::endl;
+    FDWARNING << "Cast not support for " << dtype_
+              << " now! will skip this operation." << std::endl;
   }
   return true;
 }
@@ -62,5 +60,5 @@ bool Cast::Run(Mat* mat, const std::string& dtype, ProcLib lib) {
   return c(mat, lib);
 }
 
-} // namespace vision
-} // namespace fastdeploy
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/model_zoo/vision/ppyoloe/cpp/CMakeLists.txt b/model_zoo/vision/ppyoloe/cpp/CMakeLists.txt
index e6815665171..42ae4361065 100644
--- a/model_zoo/vision/ppyoloe/cpp/CMakeLists.txt
+++ b/model_zoo/vision/ppyoloe/cpp/CMakeLists.txt
@@ -5,7 +5,7 @@ CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
 # add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
 
 # 指定下载解压后的fastdeploy库路径
-set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/)
+set(FASTDEPLOY_INSTALL_DIR  /fastdeploy/CustomOp/FastDeploy/build1/fastdeploy-linux-x64-gpu-0.3.0)
 
 include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
 
diff --git a/model_zoo/vision/ppyoloe/cpp/ppyoloe.cc b/model_zoo/vision/ppyoloe/cpp/ppyoloe.cc
index 30765f075d5..70162b1fc39 100644
--- a/model_zoo/vision/ppyoloe/cpp/ppyoloe.cc
+++ b/model_zoo/vision/ppyoloe/cpp/ppyoloe.cc
@@ -24,8 +24,7 @@ int main() {
   std::string vis_path = "vis.jpeg";
 
   auto option = fastdeploy::RuntimeOption();
-  option.device = fastdeploy::Device::CPU;
-  option.backend = fastdeploy::Backend::PDINFER;
+  option.backend = fastdeploy::Backend::TRT;
   auto model =
       vis::ppdet::PPYOLOE(model_file, params_file, config_file, option);
   if (!model.Initialized()) {
diff --git a/model_zoo/vision/yolov5/cpp/CMakeLists.txt b/model_zoo/vision/yolov5/cpp/CMakeLists.txt
index 13ddc9d21f4..c1f82a6fe5f 100644
--- a/model_zoo/vision/yolov5/cpp/CMakeLists.txt
+++ b/model_zoo/vision/yolov5/cpp/CMakeLists.txt
@@ -5,7 +5,8 @@ CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
 # add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
 
 # 指定下载解压后的fastdeploy库路径
-set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/)
+set(FASTDEPLOY_INSTALL_DIR  /fastdeploy/CustomOp/FastDeploy/build1/fastdeploy-linux-x64-gpu-0.3.0)
+
 
 include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
 
diff --git a/model_zoo/vision/yolov5/cpp/yolov5.cc b/model_zoo/vision/yolov5/cpp/yolov5.cc
index dddcee843a7..73a3cb643d7 100644
--- a/model_zoo/vision/yolov5/cpp/yolov5.cc
+++ b/model_zoo/vision/yolov5/cpp/yolov5.cc
@@ -16,7 +16,11 @@
 
 int main() {
   namespace vis = fastdeploy::vision;
-  auto model = vis::ultralytics::YOLOv5("yolov5s.onnx");
+  auto option = fastdeploy::RuntimeOption();
+  option.UseTrtBackend();
+  option.SetTrtInputShape("images", {1, 3, 320, 320}, {1, 3, 640, 640},
+                          {1, 3, 1280, 1280});
+  auto model = vis::ultralytics::YOLOv5("yolov5s.onnx", "", option);
   if (!model.Initialized()) {
     std::cerr << "Init Failed." << std::endl;
     return -1;
diff --git a/model_zoo/vision/yolox/cpp/CMakeLists.txt b/model_zoo/vision/yolox/cpp/CMakeLists.txt
index fe9668f6a0a..67bf0f2da6a 100644
--- a/model_zoo/vision/yolox/cpp/CMakeLists.txt
+++ b/model_zoo/vision/yolox/cpp/CMakeLists.txt
@@ -5,7 +5,7 @@ CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
 # add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
 
 # 指定下载解压后的fastdeploy库路径
-set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.0.3/)
+set(FASTDEPLOY_INSTALL_DIR /fastdeploy/CustomOp/FastDeploy/build1/fastdeploy-linux-x64-gpu-0.3.0)
 
 include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
 
diff --git a/model_zoo/vision/yolox/cpp/yolox.cc b/model_zoo/vision/yolox/cpp/yolox.cc
index 934a50bea8e..86948e21651 100644
--- a/model_zoo/vision/yolox/cpp/yolox.cc
+++ b/model_zoo/vision/yolox/cpp/yolox.cc
@@ -16,7 +16,11 @@
 
 int main() {
   namespace vis = fastdeploy::vision;
-  auto model = vis::megvii::YOLOX("yolox_s.onnx");
+  auto option = fastdeploy::RuntimeOption();
+  option.device = fastdeploy::Device::GPU;
+  option.backend = fastdeploy::Backend::TRT;
+  option.trt_fixed_shape["images"] = {1, 3, 640, 640};
+  auto model = vis::megvii::YOLOX("yolox_s.onnx", "", option);
   if (!model.Initialized()) {
     std::cerr << "Init Failed." << std::endl;
     return -1;
diff --git a/model_zoo/vision/yolox/yolox.py b/model_zoo/vision/yolox/yolox.py
index 8fd1a8a021a..085110c1304 100644
--- a/model_zoo/vision/yolox/yolox.py
+++ b/model_zoo/vision/yolox/yolox.py
@@ -1,5 +1,5 @@
 import fastdeploy as fd
-import cv2 
+import cv2
 
 # 下载模型和测试图片
 model_url = "https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s.onnx"
@@ -8,7 +8,9 @@
 fd.download(test_jpg_url, ".", show_progress=True)
 
 # 加载模型
-model = fd.vision.megvii.YOLOX("yolox_s.onnx")
+option = fd.RuntimeOption()
+option.use_trt_backend()
+model = fd.vision.megvii.YOLOX("yolox_s.onnx", runtime_option=option)
 
 # 预测图片
 im = cv2.imread("bus.jpg")
@@ -20,4 +22,3 @@
 
 # 输出预测结果
 print(result)
-print(model.runtime_option)
diff --git a/setup.py b/setup.py
index 19c47ed9cc8..ddef5b90d86 100644
--- a/setup.py
+++ b/setup.py
@@ -126,7 +126,7 @@ def finalize_options(self):
         pass
 
 
-def GetAllFiles(dirname):
+def get_all_files(dirname):
     files = list()
     for root, dirs, filenames in os.walk(dirname):
         for f in filenames:
@@ -358,7 +358,7 @@ def run(self):
         "fastdeploy/libs/third_libs",
         symlinks=True)
 
-    all_files = GetAllFiles("fastdeploy/libs")
+    all_files = get_all_files("fastdeploy/libs")
     for f in all_files:
         package_data[PACKAGE_NAME].append(os.path.relpath(f, "fastdeploy"))
 

From f6d66fbeee91b2a49f7f43e4dad9d5c7fa1a7331 Mon Sep 17 00:00:00 2001
From: jiangjiajun <jiangjiajun@baidu.com>
Date: Sun, 24 Jul 2022 12:25:39 +0000
Subject: [PATCH 3/6] Remove copy libraries

---
 setup.py | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/setup.py b/setup.py
index ddef5b90d86..06e9b12be65 100644
--- a/setup.py
+++ b/setup.py
@@ -353,23 +353,22 @@ def run(self):
 
     if os.path.exists("fastdeploy/libs/third_libs"):
         shutil.rmtree("fastdeploy/libs/third_libs")
-    shutil.copytree(
-        ".setuptools-cmake-build/third_libs/install",
-        "fastdeploy/libs/third_libs",
-        symlinks=True)
-
-    all_files = get_all_files("fastdeploy/libs")
-    for f in all_files:
-        package_data[PACKAGE_NAME].append(os.path.relpath(f, "fastdeploy"))
+#    shutil.copytree(
+#        ".setuptools-cmake-build/third_libs/install",
+#        "fastdeploy/libs/third_libs",
+#        symlinks=True)
 
     if platform.system().lower() == "linux":
         rpaths = ["${ORIGIN}"]
-        for root, dirs, files in os.walk("fastdeploy/libs/third_libs"):
+        for root, dirs, files in os.walk(
+                ".setuptools-cmake-build/third_libs/install"):
             for d in dirs:
                 if d == "lib":
                     path = os.path.relpath(
-                        os.path.join(root, d), "fastdeploy/libs")
-                    rpaths.append("${ORIGIN}/" + format(path))
+                        os.path.join(root, d),
+                        ".setuptools-cmake-build/third_libs/install")
+                    rpaths.append("${ORIGIN}/" + os.path.join(
+                        "libs/third_libs", path))
         rpaths = ":".join(rpaths)
         command = "patchelf --set-rpath '{}' ".format(rpaths) + os.path.join(
             "fastdeploy/libs", pybind_so_file)
@@ -379,6 +378,12 @@ def run(self):
                 command) == 0, "patchelf {} failed, the command: {}".format(
                     command, pybind_so_file)
 
+    all_files = get_all_files("fastdeploy/libs")
+    for f in all_files:
+        if f.count("third_libs") > 0:
+            continue
+        package_data[PACKAGE_NAME].append(os.path.relpath(f, "fastdeploy"))
+
 setuptools.setup(
     name=PACKAGE_NAME,
     version=VersionInfo.version,

From a218986b0ac02af94391fa171bc819b6bfdfc427 Mon Sep 17 00:00:00 2001
From: jiangjiajun <jiangjiajun@baidu.com>
Date: Sun, 24 Jul 2022 12:41:10 +0000
Subject: [PATCH 4/6] fix some issue

---
 fastdeploy/fastdeploy_runtime.cc        | 15 +++++++++++++++
 fastdeploy/fastdeploy_runtime.h         |  8 ++++++++
 fastdeploy/pybind/fastdeploy_runtime.cc |  1 +
 fastdeploy/utils/utils.cc               |  3 ---
 fastdeploy/utils/utils.h                |  3 ---
 model_zoo/vision/ppyoloe/cpp/ppyoloe.cc |  5 +----
 model_zoo/vision/yolov5/cpp/yolov5.cc   |  6 +-----
 model_zoo/vision/yolox/cpp/yolox.cc     |  6 +-----
 model_zoo/vision/yolox/yolox.py         |  4 +---
 9 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/fastdeploy/fastdeploy_runtime.cc b/fastdeploy/fastdeploy_runtime.cc
index f88f7f5794b..d1fef711f8f 100644
--- a/fastdeploy/fastdeploy_runtime.cc
+++ b/fastdeploy/fastdeploy_runtime.cc
@@ -116,6 +116,21 @@ Frontend GuessModelFormat(const std::string& model_file) {
   return Frontend::PADDLE;
 }
 
+void RuntimeOption::SetModelPath(const std::string& model_path,
+                                 const std::string& params_path,
+                                 const std::string& _model_format) {
+  if (_model_format == "paddle") {
+    model_file = model_path;
+    params_file = params_path;
+    model_format = _model_format;
+  } else if (_model_format == "onnx") {
+    model_file = model_path;
+    model_format = _model_format;
+  } else {
+    FDASSERT << "The model format only can be 'paddle' or 'onnx'." << std::endl;
+  }
+}
+
 void RuntimeOption::UseGpu(int gpu_id) {
 #ifdef WITH_GPU
   device = Device::GPU;
diff --git a/fastdeploy/fastdeploy_runtime.h b/fastdeploy/fastdeploy_runtime.h
index 2252cbc5850..d0f01069f85 100644
--- a/fastdeploy/fastdeploy_runtime.h
+++ b/fastdeploy/fastdeploy_runtime.h
@@ -36,6 +36,14 @@ bool CheckModelFormat(const std::string& model_file,
 Frontend GuessModelFormat(const std::string& model_file);
 
 struct FASTDEPLOY_DECL RuntimeOption {
+  // set path of model file and params file
+  // for onnx, only need to define model_file, but also need to
+  // define model_format
+  // model_format support 'paddle' / 'onnx' now.
+  void SetModelPath(const std::string& model_path,
+                    const std::string& params_path = "",
+                    const std::string& _model_format = "paddle");
+
   // set model inference in GPU
   void UseCpu();
 
diff --git a/fastdeploy/pybind/fastdeploy_runtime.cc b/fastdeploy/pybind/fastdeploy_runtime.cc
index 3255af0d926..5f27509cafd 100644
--- a/fastdeploy/pybind/fastdeploy_runtime.cc
+++ b/fastdeploy/pybind/fastdeploy_runtime.cc
@@ -19,6 +19,7 @@ namespace fastdeploy {
 void BindRuntime(pybind11::module& m) {
   pybind11::class_<RuntimeOption>(m, "RuntimeOption")
       .def(pybind11::init())
+      .def("set_model_path", &RuntimeOption::SetModelPath)
       .def("use_gpu", &RuntimeOption::UseGpu)
       .def("use_cpu", &RuntimeOption::UseCpu)
       .def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
diff --git a/fastdeploy/utils/utils.cc b/fastdeploy/utils/utils.cc
index 95e94c4fec9..dfe5326d12b 100644
--- a/fastdeploy/utils/utils.cc
+++ b/fastdeploy/utils/utils.cc
@@ -16,9 +16,6 @@
 
 namespace fastdeploy {
 
-bool DISABLE_WARNING = false;
-bool DISABLE_INFO = false;
-
 FDLogger::FDLogger(bool verbose, const std::string& prefix) {
   verbose_ = verbose;
   line_ = "";
diff --git a/fastdeploy/utils/utils.h b/fastdeploy/utils/utils.h
index 89dcb57104a..f427cd7a3b8 100644
--- a/fastdeploy/utils/utils.h
+++ b/fastdeploy/utils/utils.h
@@ -33,9 +33,6 @@
 
 namespace fastdeploy {
 
-extern bool DISABLE_WARNING;
-extern bool DISABLE_INFO;
-
 class FASTDEPLOY_DECL FDLogger {
  public:
   FDLogger() {
diff --git a/model_zoo/vision/ppyoloe/cpp/ppyoloe.cc b/model_zoo/vision/ppyoloe/cpp/ppyoloe.cc
index 70162b1fc39..e63f29e62a5 100644
--- a/model_zoo/vision/ppyoloe/cpp/ppyoloe.cc
+++ b/model_zoo/vision/ppyoloe/cpp/ppyoloe.cc
@@ -23,10 +23,7 @@ int main() {
   std::string img_path = "000000014439_640x640.jpg";
   std::string vis_path = "vis.jpeg";
 
-  auto option = fastdeploy::RuntimeOption();
-  option.backend = fastdeploy::Backend::TRT;
-  auto model =
-      vis::ppdet::PPYOLOE(model_file, params_file, config_file, option);
+  auto model = vis::ppdet::PPYOLOE(model_file, params_file, config_file);
   if (!model.Initialized()) {
     std::cerr << "Init Failed." << std::endl;
     return -1;
diff --git a/model_zoo/vision/yolov5/cpp/yolov5.cc b/model_zoo/vision/yolov5/cpp/yolov5.cc
index 73a3cb643d7..dddcee843a7 100644
--- a/model_zoo/vision/yolov5/cpp/yolov5.cc
+++ b/model_zoo/vision/yolov5/cpp/yolov5.cc
@@ -16,11 +16,7 @@
 
 int main() {
   namespace vis = fastdeploy::vision;
-  auto option = fastdeploy::RuntimeOption();
-  option.UseTrtBackend();
-  option.SetTrtInputShape("images", {1, 3, 320, 320}, {1, 3, 640, 640},
-                          {1, 3, 1280, 1280});
-  auto model = vis::ultralytics::YOLOv5("yolov5s.onnx", "", option);
+  auto model = vis::ultralytics::YOLOv5("yolov5s.onnx");
   if (!model.Initialized()) {
     std::cerr << "Init Failed." << std::endl;
     return -1;
diff --git a/model_zoo/vision/yolox/cpp/yolox.cc b/model_zoo/vision/yolox/cpp/yolox.cc
index 86948e21651..934a50bea8e 100644
--- a/model_zoo/vision/yolox/cpp/yolox.cc
+++ b/model_zoo/vision/yolox/cpp/yolox.cc
@@ -16,11 +16,7 @@
 
 int main() {
   namespace vis = fastdeploy::vision;
-  auto option = fastdeploy::RuntimeOption();
-  option.device = fastdeploy::Device::GPU;
-  option.backend = fastdeploy::Backend::TRT;
-  option.trt_fixed_shape["images"] = {1, 3, 640, 640};
-  auto model = vis::megvii::YOLOX("yolox_s.onnx", "", option);
+  auto model = vis::megvii::YOLOX("yolox_s.onnx");
   if (!model.Initialized()) {
     std::cerr << "Init Failed." << std::endl;
     return -1;
diff --git a/model_zoo/vision/yolox/yolox.py b/model_zoo/vision/yolox/yolox.py
index 085110c1304..b63675049b1 100644
--- a/model_zoo/vision/yolox/yolox.py
+++ b/model_zoo/vision/yolox/yolox.py
@@ -8,9 +8,7 @@
 fd.download(test_jpg_url, ".", show_progress=True)
 
 # 加载模型
-option = fd.RuntimeOption()
-option.use_trt_backend()
-model = fd.vision.megvii.YOLOX("yolox_s.onnx", runtime_option=option)
+model = fd.vision.megvii.YOLOX("yolox_s.onnx")
 
 # 预测图片
 im = cv2.imread("bus.jpg")

From e69b051a4aef6bdde7353f9253e8e1fc18eea337 Mon Sep 17 00:00:00 2001
From: jiangjiajun <jiangjiajun@baidu.com>
Date: Sun, 24 Jul 2022 13:03:43 +0000
Subject: [PATCH 5/6] fix bug

---
 fastdeploy/fastdeploy_runtime.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fastdeploy/fastdeploy_runtime.cc b/fastdeploy/fastdeploy_runtime.cc
index d1fef711f8f..19f0740ab4f 100644
--- a/fastdeploy/fastdeploy_runtime.cc
+++ b/fastdeploy/fastdeploy_runtime.cc
@@ -122,10 +122,10 @@ void RuntimeOption::SetModelPath(const std::string& model_path,
   if (_model_format == "paddle") {
     model_file = model_path;
     params_file = params_path;
-    model_format = _model_format;
+    model_format = Frontend::PADDLE;
   } else if (_model_format == "onnx") {
     model_file = model_path;
-    model_format = _model_format;
+    model_format = Frontend::ONNX;
   } else {
     FDASSERT << "The model format only can be 'paddle' or 'onnx'." << std::endl;
   }

From dac68cbadd053e7a17959fb2a2746dfc8a60c8c5 Mon Sep 17 00:00:00 2001
From: jiangjiajun <jiangjiajun@baidu.com>
Date: Sun, 24 Jul 2022 13:07:28 +0000
Subject: [PATCH 6/6] fix bug

---
 fastdeploy/fastdeploy_runtime.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fastdeploy/fastdeploy_runtime.cc b/fastdeploy/fastdeploy_runtime.cc
index 19f0740ab4f..05af6e14e34 100644
--- a/fastdeploy/fastdeploy_runtime.cc
+++ b/fastdeploy/fastdeploy_runtime.cc
@@ -127,7 +127,7 @@ void RuntimeOption::SetModelPath(const std::string& model_path,
     model_file = model_path;
     model_format = Frontend::ONNX;
   } else {
-    FDASSERT << "The model format only can be 'paddle' or 'onnx'." << std::endl;
+    FDASSERT(false, "The model format only can be 'paddle' or 'onnx'.");
   }
 }