From 018b9025e9c2b63b3c190c65f66a2d83c4af6442 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Tue, 1 Nov 2022 12:04:12 +0000
Subject: [PATCH 01/50] add paddle_trt in benchmark

---
 benchmark/README.md              | 28 ++++++++++++++---
 benchmark/benchmark_ppcls.py     | 52 +++++++++++++++++++------------
 benchmark/benchmark_ppdet.py     | 52 +++++++++++++++++++------------
 benchmark/benchmark_ppseg.py     | 52 +++++++++++++++++++------------
 benchmark/benchmark_yolo.py      | 53 +++++++++++++++++++-------------
 benchmark/convert_info.py        | 48 ++++++++++++++++++++++-------
 benchmark/requirements.txt       |  2 --
 benchmark/run_benchmark_ppcls.sh | 24 ++++++++-------
 benchmark/run_benchmark_ppdet.sh | 24 ++++++++-------
 benchmark/run_benchmark_ppseg.sh | 24 ++++++++-------
 benchmark/run_benchmark_yolo.sh  | 16 +++++-----
 11 files changed, 236 insertions(+), 139 deletions(-)

diff --git a/benchmark/README.md b/benchmark/README.md
index f01214ee65f..b1f96c1bea3 100644
--- a/benchmark/README.md
+++ b/benchmark/README.md
@@ -2,7 +2,7 @@
 
 在跑benchmark前，需确认以下两个步骤
 
-* 1. 软硬件环境满足要求，参考[FastDeploy环境要求](..//docs/cn/build_and_install/download_prebuilt_libraries.md) 
+* 1. 软硬件环境满足要求，参考[FastDeploy环境要求](..//docs/cn/build_and_install/download_prebuilt_libraries.md)
 * 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](../docs/cn/build_and_install/download_prebuilt_libraries.md)
 
 FastDeploy 目前支持多种推理后端，下面以 PaddleClas MobileNetV1 为例，跑出多后端在 CPU/GPU 对应 benchmark 数据
@@ -29,6 +29,12 @@ python benchmark_ppcls.py --model MobileNetV1_x0_25_infer --image ILSVRC2012_val
 # Paddle Inference
 python benchmark_ppcls.py --model MobileNetV1_x0_25_infer --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle
 
+# Paddle Inference + TensorRT
+python benchmark_ppcls.py --model MobileNetV1_x0_25_infer --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle_trt
+
+# Paddle Inference + TensorRT fp16
+python benchmark_ppcls.py --model MobileNetV1_x0_25_infer --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle_trt --enable_trt_fp16 True
+
 # ONNX Runtime
 python benchmark_ppcls.py --model MobileNetV1_x0_25_infer --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend ort
 
@@ -50,8 +56,9 @@ python benchmark_ppcls.py --model MobileNetV1_x0_25_infer --image ILSVRC2012_val
 | --cpu_num_thread     | CPU 线程数      |
 | --device_id          | GPU 卡号                             |
 | --iter_num           | 跑 benchmark 的迭代次数 |
-| --backend            | 指定后端类型，有ort, ov, trt, paddle四个选项  |
-| --enable_trt_fp16    | 当后端为trt时，是否开启fp16  |
+| --backend            | 指定后端类型，有ort, ov, trt, paddle, paddle_trt 五个选项  |
+| --enable_trt_fp16    | 当后端为trt或paddle_trt时，是否开启fp16  |
+| --enable_collect_memory_info    | 是否记录 cpu/gpu memory信息，默认 False  |
 
 **最终txt结果**
 
@@ -62,7 +69,7 @@ python benchmark_ppcls.py --model MobileNetV1_x0_25_infer --image ILSVRC2012_val
 cat *.txt >> ./result_ppcls.txt
 
 # 结构化信息
-python convert_info.py --txt_path result_ppcls.txt --domain ppcls
+python convert_info.py --txt_path result_ppcls.txt --domain ppcls --enable_collect_memory_info True
 ```
 
 得到 CPU 结果```struct_cpu_ppcls.txt```以及 GPU 结果```struct_gpu_ppcls.txt```如下所示
@@ -89,3 +96,16 @@ sh run_benchmark_ppcls.sh
 ```
 
 一键得到所有模型在 CPU 以及 GPU 的 benchmark 数据
+
+
+**添加新设备**
+
+如果添加了一种新设备，想进行 benchmark 测试，以```ipu```为例
+
+在对应 benchmark 脚本```--device```中加入```ipu```选项，并通过```option.use_ipu()```进行开启
+
+输入下列命令，进行 benchmark 测试
+
+```shell
+python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --iter_num 2000 --backend paddle --device ipu
+```
diff --git a/benchmark/benchmark_ppcls.py b/benchmark/benchmark_ppcls.py
index 410f20e4129..5b05bfe6687 100644
--- a/benchmark/benchmark_ppcls.py
+++ b/benchmark/benchmark_ppcls.py
@@ -51,12 +51,17 @@ def parse_arguments():
         "--backend",
         type=str,
         default="ort",
-        help="inference backend, ort, ov, trt, paddle.")
+        help="inference backend, ort, ov, trt, paddle, paddle_trt.")
     parser.add_argument(
         "--enable_trt_fp16",
         type=bool,
         default=False,
         help="whether enable fp16 in trt backend")
+    parser.add_argument(
+        "--enable_collect_memory_info",
+        type=bool,
+        default=False,
+        help="whether enable collect memory info")
     args = parser.parse_args()
     return args
 
@@ -69,9 +74,11 @@ def build_option(args):
     if device == "gpu":
         option.use_gpu(args.device_id)
 
-    if backend == "trt":
+    if backend == "trt" or backend == "paddle_trt":
         assert device == "gpu", "the trt backend need device==gpu"
         option.use_trt_backend()
+        if backend == "paddle_trt":
+            option.enable_paddle_to_trt()
         if args.enable_trt_fp16:
             option.enable_trt_fp16()
     elif backend == "ov":
@@ -130,45 +137,50 @@ def get_current_gputil(gpu_id):
         else:
             file_path = args.model + "_model_" + args.backend + "_" + args.device + ".txt"
     f = open(file_path, "w")
-    f.writelines("===={}====: \n".format(file_path.split("/")[1][:-4]))
+    f.writelines("===={}====: \n".format(os.path.split(file_path)[-1][:-4]))
 
     try:
         model = fd.vision.classification.PaddleClasModel(
             model_file, params_file, config_file, runtime_option=option)
         model.enable_record_time_of_runtime()
+        im_ori = cv2.imread(args.image)
         for i in range(args.iter_num):
-            im = cv2.imread(args.image)
+            im = im_ori
             start = time.time()
             result = model.predict(im)
             end2end_statis.append(time.time() - start)
-            gpu_util.append(get_current_gputil(gpu_id))
-            cm, gm = get_current_memory_mb(gpu_id)
-            cpu_mem.append(cm)
-            gpu_mem.append(gm)
+            if args.enable_collect_memory_info:
+                gpu_util.append(get_current_gputil(gpu_id))
+                cm, gm = get_current_memory_mb(gpu_id)
+                cpu_mem.append(cm)
+                gpu_mem.append(gm)
 
         runtime_statis = model.print_statis_info_of_runtime()
 
         warmup_iter = args.iter_num // 5
-        repeat_iter = args.iter_num - warmup_iter
         end2end_statis_repeat = end2end_statis[warmup_iter:]
-        cpu_mem_repeat = cpu_mem[warmup_iter:]
-        gpu_mem_repeat = gpu_mem[warmup_iter:]
-        gpu_util_repeat = gpu_util[warmup_iter:]
+        if args.enable_collect_memory_info:
+            cpu_mem_repeat = cpu_mem[warmup_iter:]
+            gpu_mem_repeat = gpu_mem[warmup_iter:]
+            gpu_util_repeat = gpu_util[warmup_iter:]
 
         dump_result = dict()
         dump_result["runtime"] = runtime_statis["avg_time"] * 1000
         dump_result["end2end"] = np.mean(end2end_statis_repeat) * 1000
-        dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat)
-        dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat)
-        dump_result["gpu_util"] = np.mean(gpu_util_repeat)
+        if args.enable_collect_memory_info:
+            dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat)
+            dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat)
+            dump_result["gpu_util"] = np.mean(gpu_util_repeat)
 
         f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"])))
         f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"])))
-        f.writelines("cpu_rss_mb: {} \n".format(
-            str(dump_result["cpu_rss_mb"])))
-        f.writelines("gpu_rss_mb: {} \n".format(
-            str(dump_result["gpu_rss_mb"])))
-        f.writelines("gpu_util: {} \n".format(str(dump_result["gpu_util"])))
+        if args.enable_collect_memory_info:
+            f.writelines("cpu_rss_mb: {} \n".format(
+                str(dump_result["cpu_rss_mb"])))
+            f.writelines("gpu_rss_mb: {} \n".format(
+                str(dump_result["gpu_rss_mb"])))
+            f.writelines("gpu_util: {} \n".format(
+                str(dump_result["gpu_util"])))
     except:
         f.writelines("!!!!!Infer Failed\n")
 
diff --git a/benchmark/benchmark_ppdet.py b/benchmark/benchmark_ppdet.py
index 6b2f946f946..2f192de065b 100644
--- a/benchmark/benchmark_ppdet.py
+++ b/benchmark/benchmark_ppdet.py
@@ -53,12 +53,17 @@ def parse_arguments():
         "--backend",
         type=str,
         default="ort",
-        help="inference backend, ort, ov, trt, paddle.")
+        help="inference backend, ort, ov, trt, paddle, paddle_trt.")
     parser.add_argument(
         "--enable_trt_fp16",
         type=bool,
         default=False,
         help="whether enable fp16 in trt backend")
+    parser.add_argument(
+        "--enable_collect_memory_info",
+        type=bool,
+        default=False,
+        help="whether enable collect memory info")
     args = parser.parse_args()
     return args
 
@@ -71,9 +76,11 @@ def build_option(args):
     if device == "gpu":
         option.use_gpu(args.device_id)
 
-    if backend == "trt":
+    if backend == "trt" or backend == "paddle_trt":
         assert device == "gpu", "the trt backend need device==gpu"
         option.use_trt_backend()
+        if backend == "paddle_trt":
+            option.enable_paddle_to_trt()
         if args.enable_trt_fp16:
             option.enable_trt_fp16()
     elif backend == "ov":
@@ -131,7 +138,7 @@ def get_current_gputil(gpu_id):
         else:
             file_path = args.model + "_model_" + args.backend + "_" + args.device + ".txt"
     f = open(file_path, "w")
-    f.writelines("===={}====: \n".format(file_path.split("/")[1][:-4]))
+    f.writelines("===={}====: \n".format(os.path.split(file_path)[-1][:-4]))
 
     try:
         if "ppyoloe" in args.model:
@@ -156,39 +163,44 @@ def get_current_gputil(gpu_id):
             raise Exception("model {} not support now in ppdet series".format(
                 args.model))
         model.enable_record_time_of_runtime()
+        im_ori = cv2.imread(args.image)
         for i in range(args.iter_num):
-            im = cv2.imread(args.image)
+            im = im_ori
             start = time.time()
             result = model.predict(im)
             end2end_statis.append(time.time() - start)
-            gpu_util.append(get_current_gputil(gpu_id))
-            cm, gm = get_current_memory_mb(gpu_id)
-            cpu_mem.append(cm)
-            gpu_mem.append(gm)
+            if args.enable_collect_memory_info:
+                gpu_util.append(get_current_gputil(gpu_id))
+                cm, gm = get_current_memory_mb(gpu_id)
+                cpu_mem.append(cm)
+                gpu_mem.append(gm)
 
         runtime_statis = model.print_statis_info_of_runtime()
 
         warmup_iter = args.iter_num // 5
-        repeat_iter = args.iter_num - warmup_iter
         end2end_statis_repeat = end2end_statis[warmup_iter:]
-        cpu_mem_repeat = cpu_mem[warmup_iter:]
-        gpu_mem_repeat = gpu_mem[warmup_iter:]
-        gpu_util_repeat = gpu_util[warmup_iter:]
+        if args.enable_collect_memory_info:
+            cpu_mem_repeat = cpu_mem[warmup_iter:]
+            gpu_mem_repeat = gpu_mem[warmup_iter:]
+            gpu_util_repeat = gpu_util[warmup_iter:]
 
         dump_result = dict()
         dump_result["runtime"] = runtime_statis["avg_time"] * 1000
         dump_result["end2end"] = np.mean(end2end_statis_repeat) * 1000
-        dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat)
-        dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat)
-        dump_result["gpu_util"] = np.mean(gpu_util_repeat)
+        if args.enable_collect_memory_info:
+            dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat)
+            dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat)
+            dump_result["gpu_util"] = np.mean(gpu_util_repeat)
 
         f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"])))
         f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"])))
-        f.writelines("cpu_rss_mb: {} \n".format(
-            str(dump_result["cpu_rss_mb"])))
-        f.writelines("gpu_rss_mb: {} \n".format(
-            str(dump_result["gpu_rss_mb"])))
-        f.writelines("gpu_util: {} \n".format(str(dump_result["gpu_util"])))
+        if args.enable_collect_memory_info:
+            f.writelines("cpu_rss_mb: {} \n".format(
+                str(dump_result["cpu_rss_mb"])))
+            f.writelines("gpu_rss_mb: {} \n".format(
+                str(dump_result["gpu_rss_mb"])))
+            f.writelines("gpu_util: {} \n".format(
+                str(dump_result["gpu_util"])))
     except:
         f.writelines("!!!!!Infer Failed\n")
 
diff --git a/benchmark/benchmark_ppseg.py b/benchmark/benchmark_ppseg.py
index 7c118cec525..81e0db797aa 100644
--- a/benchmark/benchmark_ppseg.py
+++ b/benchmark/benchmark_ppseg.py
@@ -51,12 +51,17 @@ def parse_arguments():
         "--backend",
         type=str,
         default="ort",
-        help="inference backend, ort, ov, trt, paddle.")
+        help="inference backend, ort, ov, trt, paddle, paddle_trt.")
     parser.add_argument(
         "--enable_trt_fp16",
         type=bool,
         default=False,
         help="whether enable fp16 in trt backend")
+    parser.add_argument(
+        "--enable_collect_memory_info",
+        type=bool,
+        default=False,
+        help="whether enable collect memory info")
     args = parser.parse_args()
     return args
 
@@ -69,9 +74,11 @@ def build_option(args):
     if device == "gpu":
         option.use_gpu(args.device_id)
 
-    if backend == "trt":
+    if backend == "trt" or backend == "paddle_trt":
         assert device == "gpu", "the trt backend need device==gpu"
         option.use_trt_backend()
+        if backend == "paddle_trt":
+            option.enable_paddle_to_trt()
         if args.enable_trt_fp16:
             option.enable_trt_fp16()
     elif backend == "ov":
@@ -129,45 +136,50 @@ def get_current_gputil(gpu_id):
         else:
             file_path = args.model + "_model_" + args.backend + "_" + args.device + ".txt"
     f = open(file_path, "w")
-    f.writelines("===={}====: \n".format(file_path.split("/")[1][:-4]))
+    f.writelines("===={}====: \n".format(os.path.split(file_path)[-1][:-4]))
 
     try:
         model = fd.vision.segmentation.PaddleSegModel(
             model_file, params_file, config_file, runtime_option=option)
         model.enable_record_time_of_runtime()
+        im_ori = cv2.imread(args.image)
         for i in range(args.iter_num):
-            im = cv2.imread(args.image)
+            im = im_ori
             start = time.time()
             result = model.predict(im)
             end2end_statis.append(time.time() - start)
-            gpu_util.append(get_current_gputil(gpu_id))
-            cm, gm = get_current_memory_mb(gpu_id)
-            cpu_mem.append(cm)
-            gpu_mem.append(gm)
+            if args.enable_collect_memory_info:
+                gpu_util.append(get_current_gputil(gpu_id))
+                cm, gm = get_current_memory_mb(gpu_id)
+                cpu_mem.append(cm)
+                gpu_mem.append(gm)
 
         runtime_statis = model.print_statis_info_of_runtime()
 
         warmup_iter = args.iter_num // 5
-        repeat_iter = args.iter_num - warmup_iter
         end2end_statis_repeat = end2end_statis[warmup_iter:]
-        cpu_mem_repeat = cpu_mem[warmup_iter:]
-        gpu_mem_repeat = gpu_mem[warmup_iter:]
-        gpu_util_repeat = gpu_util[warmup_iter:]
+        if args.enable_collect_memory_info:
+            cpu_mem_repeat = cpu_mem[warmup_iter:]
+            gpu_mem_repeat = gpu_mem[warmup_iter:]
+            gpu_util_repeat = gpu_util[warmup_iter:]
 
         dump_result = dict()
         dump_result["runtime"] = runtime_statis["avg_time"] * 1000
         dump_result["end2end"] = np.mean(end2end_statis_repeat) * 1000
-        dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat)
-        dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat)
-        dump_result["gpu_util"] = np.mean(gpu_util_repeat)
+        if args.enable_collect_memory_info:
+            dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat)
+            dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat)
+            dump_result["gpu_util"] = np.mean(gpu_util_repeat)
 
         f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"])))
         f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"])))
-        f.writelines("cpu_rss_mb: {} \n".format(
-            str(dump_result["cpu_rss_mb"])))
-        f.writelines("gpu_rss_mb: {} \n".format(
-            str(dump_result["gpu_rss_mb"])))
-        f.writelines("gpu_util: {} \n".format(str(dump_result["gpu_util"])))
+        if args.enable_collect_memory_info:
+            f.writelines("cpu_rss_mb: {} \n".format(
+                str(dump_result["cpu_rss_mb"])))
+            f.writelines("gpu_rss_mb: {} \n".format(
+                str(dump_result["gpu_rss_mb"])))
+            f.writelines("gpu_util: {} \n".format(
+                str(dump_result["gpu_util"])))
     except:
         f.writelines("!!!!!Infer Failed\n")
 
diff --git a/benchmark/benchmark_yolo.py b/benchmark/benchmark_yolo.py
index 81a87323c04..65e89a516a1 100644
--- a/benchmark/benchmark_yolo.py
+++ b/benchmark/benchmark_yolo.py
@@ -53,12 +53,17 @@ def parse_arguments():
         "--backend",
         type=str,
         default="ort",
-        help="inference backend, ort, ov, trt, paddle.")
+        help="inference backend, ort, ov, trt, paddle, paddle_trt.")
     parser.add_argument(
         "--enable_trt_fp16",
         type=bool,
         default=False,
         help="whether enable fp16 in trt backend")
+    parser.add_argument(
+        "--enable_collect_memory_info",
+        type=bool,
+        default=False,
+        help="whether enable collect memory info")
     args = parser.parse_args()
     return args
 
@@ -71,9 +76,11 @@ def build_option(args):
     if device == "gpu":
         option.use_gpu(args.device_id)
 
-    if backend == "trt":
+    if backend == "trt" or backend == "paddle_trt":
         assert device == "gpu", "the trt backend need device==gpu"
         option.use_trt_backend()
+        if backend == "paddle_trt":
+            option.enable_paddle_to_trt()
         if args.enable_trt_fp16:
             option.enable_trt_fp16()
     elif backend == "ov":
@@ -129,7 +136,7 @@ def get_current_gputil(gpu_id):
         else:
             file_path = args.model + "_model_" + args.backend + "_" + args.device + ".txt"
     f = open(file_path, "w")
-    f.writelines("===={}====: \n".format(file_path.split("/")[1][:-4]))
+    f.writelines("===={}====: \n".format(os.path.split(file_path)[-1][:-4]))
 
     try:
         if "yolox" in model_file:
@@ -148,40 +155,44 @@ def get_current_gputil(gpu_id):
             raise Exception("model {} not support now in yolo series".format(
                 args.model))
         model.enable_record_time_of_runtime()
-
+        im_ori = cv2.imread(args.image)
         for i in range(args.iter_num):
-            im = cv2.imread(args.image)
+            im = im_ori
             start = time.time()
             result = model.predict(im)
             end2end_statis.append(time.time() - start)
-            gpu_util.append(get_current_gputil(gpu_id))
-            cm, gm = get_current_memory_mb(gpu_id)
-            cpu_mem.append(cm)
-            gpu_mem.append(gm)
+            if args.enable_collect_memory_info:
+                gpu_util.append(get_current_gputil(gpu_id))
+                cm, gm = get_current_memory_mb(gpu_id)
+                cpu_mem.append(cm)
+                gpu_mem.append(gm)
 
         runtime_statis = model.print_statis_info_of_runtime()
 
         warmup_iter = args.iter_num // 5
-        repeat_iter = args.iter_num - warmup_iter
         end2end_statis_repeat = end2end_statis[warmup_iter:]
-        cpu_mem_repeat = cpu_mem[warmup_iter:]
-        gpu_mem_repeat = gpu_mem[warmup_iter:]
-        gpu_util_repeat = gpu_util[warmup_iter:]
+        if args.enable_collect_memory_info:
+            cpu_mem_repeat = cpu_mem[warmup_iter:]
+            gpu_mem_repeat = gpu_mem[warmup_iter:]
+            gpu_util_repeat = gpu_util[warmup_iter:]
 
         dump_result = dict()
         dump_result["runtime"] = runtime_statis["avg_time"] * 1000
         dump_result["end2end"] = np.mean(end2end_statis_repeat) * 1000
-        dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat)
-        dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat)
-        dump_result["gpu_util"] = np.mean(gpu_util_repeat)
+        if args.enable_collect_memory_info:
+            dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat)
+            dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat)
+            dump_result["gpu_util"] = np.mean(gpu_util_repeat)
 
         f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"])))
         f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"])))
-        f.writelines("cpu_rss_mb: {} \n".format(
-            str(dump_result["cpu_rss_mb"])))
-        f.writelines("gpu_rss_mb: {} \n".format(
-            str(dump_result["gpu_rss_mb"])))
-        f.writelines("gpu_util: {} \n".format(str(dump_result["gpu_util"])))
+        if args.enable_collect_memory_info:
+            f.writelines("cpu_rss_mb: {} \n".format(
+                str(dump_result["cpu_rss_mb"])))
+            f.writelines("gpu_rss_mb: {} \n".format(
+                str(dump_result["gpu_rss_mb"])))
+            f.writelines("gpu_util: {} \n".format(
+                str(dump_result["gpu_util"])))
     except:
         f.writelines("!!!!!Infer Failed\n")
 
diff --git a/benchmark/convert_info.py b/benchmark/convert_info.py
index 893ffecbec8..338a0cec582 100644
--- a/benchmark/convert_info.py
+++ b/benchmark/convert_info.py
@@ -18,9 +18,15 @@
 parser = argparse.ArgumentParser(description='manual to this script')
 parser.add_argument('--txt_path', type=str, default='result.txt')
 parser.add_argument('--domain', type=str, default='ppcls')
+parser.add_argument(
+    "--enable_collect_memory_info",
+    type=bool,
+    default=False,
+    help="whether enable collect memory info")
 args = parser.parse_args()
 txt_path = args.txt_path
 domain = args.domain
+enable_collect_memory_info = args.enable_collect_memory_info
 
 f1 = open(txt_path, "r")
 lines = f1.readlines()
@@ -33,6 +39,8 @@
 paddle_cpu_thread1 = dict()
 paddle_cpu_thread8 = dict()
 paddle_gpu = dict()
+paddle_trt_gpu = dict()
+paddle_trt_gpu_fp16 = dict()
 trt_gpu = dict()
 trt_gpu_fp16 = dict()
 model_name_set = set()
@@ -43,6 +51,8 @@
         model_name_set.add(model_name)
         runtime = "-"
         end2end = "-"
+        cpu_rss_mb = "-"
+        gpu_rss_mb = "-"
         if "Runtime(ms)" in lines[i + 1]:
             runtime_ori = lines[i + 1].split(": ")[1]
             # two decimal places
@@ -53,16 +63,17 @@
             # two decimal places
             end2end_list = end2end_ori.split(".")
             end2end = end2end_list[0] + "." + end2end_list[1][:2]
-        if "cpu_rss_mb" in lines[i + 3]:
-            cpu_rss_mb_ori = lines[i + 3].split(": ")[1]
-            # two decimal places
-            cpu_rss_mb_list = cpu_rss_mb_ori.split(".")
-            cpu_rss_mb = cpu_rss_mb_list[0] + "." + cpu_rss_mb_list[1][:2]
-        if "gpu_rss_mb" in lines[i + 4]:
-            gpu_rss_mb_ori = lines[i + 4].split(": ")[1]
-            # two decimal places
-            gpu_rss_mb_list = gpu_rss_mb_ori.split(".")
-            gpu_rss_mb = gpu_rss_mb_list[0] + "." + gpu_rss_mb_list[1][:2]
+        if enable_collect_memory_info:
+            if "cpu_rss_mb" in lines[i + 3]:
+                cpu_rss_mb_ori = lines[i + 3].split(": ")[1]
+                # two decimal places
+                cpu_rss_mb_list = cpu_rss_mb_ori.split(".")
+                cpu_rss_mb = cpu_rss_mb_list[0] + "." + cpu_rss_mb_list[1][:2]
+            if "gpu_rss_mb" in lines[i + 4]:
+                gpu_rss_mb_ori = lines[i + 4].split(": ")[1]
+                # two decimal places
+                gpu_rss_mb_list = gpu_rss_mb_ori.split(".")
+                gpu_rss_mb = gpu_rss_mb_list[0] + "." + gpu_rss_mb_list[1][:2]
         if "ort_cpu_1" in lines[i]:
             ort_cpu_thread1[
                 model_name] = runtime + "\t" + end2end + "\t" + cpu_rss_mb
@@ -86,6 +97,12 @@
         elif "paddle_gpu" in lines[i]:
             paddle_gpu[
                 model_name] = runtime + "\t" + end2end + "\t" + gpu_rss_mb
+        elif "paddle_trt_gpu" in lines[i]:
+            paddle_trt_gpu[
+                model_name] = runtime + "\t" + end2end + "\t" + gpu_rss_mb
+        elif "paddle_trt_fp16_gpu" in lines[i]:
+            paddle_trt_gpu_fp16[
+                model_name] = runtime + "\t" + end2end + "\t" + gpu_rss_mb
         elif "trt_gpu" in lines[i]:
             trt_gpu[model_name] = runtime + "\t" + end2end + "\t" + gpu_rss_mb
         elif "trt_fp16_gpu" in lines[i]:
@@ -131,7 +148,7 @@
 
 f3 = open("struct_gpu_" + domain + ".txt", "w")
 f3.writelines(
-    "model_name\tort_run\tort_end2end\tgpu_rss_mb\tpaddle_run\tpaddle_end2end\tgpu_rss_mb\ttrt_run\ttrt_end2end\tgpu_rss_mb\ttrt_fp16_run\ttrt_fp16_end2end\tgpu_rss_mb\n"
+    "model_name\tort_run\tort_end2end\tgpu_rss_mb\tpaddle_run\tpaddle_end2end\tgpu_rss_mb\tpaddle_trt_run\tpaddle_trt_end2end\tgpu_rss_mb\tpaddle_trt_fp16_run\tpaddle_trt_fp16_end2end\tgpu_rss_mb\ttrt_run\ttrt_end2end\tgpu_rss_mb\ttrt_fp16_run\ttrt_fp16_end2end\tgpu_rss_mb\n"
 )
 for model_name in model_name_set:
     lines1 = model_name + '\t'
@@ -143,6 +160,15 @@
         lines1 += paddle_gpu[model_name] + '\t'
     else:
         lines1 += "-\t-\t-\t"
+    if model_name in paddle_trt_gpu and paddle_trt_gpu[model_name] != "":
+        lines1 += paddle_trt_gpu[model_name] + '\t'
+    else:
+        lines1 += "-\t-\t-\t"
+    if model_name in paddle_trt_gpu_fp16 and paddle_trt_gpu_fp16[
+            model_name] != "":
+        lines1 += paddle_trt_gpu_fp16[model_name] + '\t'
+    else:
+        lines1 += "-\t-\t-\t"
     if model_name in trt_gpu and trt_gpu[model_name] != "":
         lines1 += trt_gpu[model_name] + '\t'
     else:
diff --git a/benchmark/requirements.txt b/benchmark/requirements.txt
index 9f1d255ea7b..9d78d39fede 100644
--- a/benchmark/requirements.txt
+++ b/benchmark/requirements.txt
@@ -2,5 +2,3 @@ numpy
 pynvml
 psutil
 GPUtil
-time
-numpy
diff --git a/benchmark/run_benchmark_ppcls.sh b/benchmark/run_benchmark_ppcls.sh
index 71e33a93194..c82c0ac0106 100644
--- a/benchmark/run_benchmark_ppcls.sh
+++ b/benchmark/run_benchmark_ppcls.sh
@@ -6,16 +6,18 @@ counter=1
 for model in $(ls -d ppcls_model/* )
 do
     echo "[Benchmark-PPcls] ${counter}/${num_of_models} $model ..."
-    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend ort
-    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend ort
-    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend paddle
-    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend paddle
-    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend ov
-    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend ov
-    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend ort
-    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle
-    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend trt
-    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend trt --enable_trt_fp16 True
+    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend ort --enable_collect_memory_info True
+    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend ort --enable_collect_memory_info True
+    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend paddle --enable_collect_memory_info True
+    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend paddle --enable_collect_memory_info True
+    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend ov --enable_collect_memory_info True
+    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend ov --enable_collect_memory_info True
+    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend ort --enable_collect_memory_info True
+    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle --enable_collect_memory_info True
+    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle_trt --enable_collect_memory_info True
+    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle_trt --enable_trt_fp16 True --enable_collect_memory_info True
+    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend trt --enable_collect_memory_info True
+    python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend trt --enable_trt_fp16 True --enable_collect_memory_info True
     counter=$(($counter+1))
     step=$(( $counter % 1 ))
     if [ $step = 0 ]
@@ -30,4 +32,4 @@ rm -rf result_ppcls.txt
 touch result_ppcls.txt
 cat ppcls_model/*.txt >> ./result_ppcls.txt
 
-python convert_info.py --txt_path result_ppcls.txt --domain ppcls
+python convert_info.py --txt_path result_ppcls.txt --domain ppcls --enable_collect_memory_info True
diff --git a/benchmark/run_benchmark_ppdet.sh b/benchmark/run_benchmark_ppdet.sh
index 55f7de39f20..3b27d350265 100644
--- a/benchmark/run_benchmark_ppdet.sh
+++ b/benchmark/run_benchmark_ppdet.sh
@@ -6,16 +6,18 @@ counter=1
 for model in $(ls -d ppdet_model/* )
 do
     echo "[Benchmark-PPdet] ${counter}/${num_of_models} $model ..."
-    python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 1 --iter_num 2000 --backend ort
-    python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 8 --iter_num 2000 --backend ort
-    python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 1 --iter_num 2000 --backend paddle
-    python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 8 --iter_num 2000 --backend paddle
-    python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 1 --iter_num 2000 --backend ov
-    python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 8 --iter_num 2000 --backend ov
-    python benchmark_ppdet.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend ort
-    python benchmark_ppdet.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend paddle
-    python benchmark_ppdet.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend trt
-    python benchmark_ppdet.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend trt --enable_trt_fp16 True
+    python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 1 --iter_num 2000 --backend ort --enable_collect_memory_info True
+    python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 8 --iter_num 2000 --backend ort --enable_collect_memory_info True
+    python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 1 --iter_num 2000 --backend paddle --enable_collect_memory_info True
+    python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 8 --iter_num 2000 --backend paddle --enable_collect_memory_info True
+    python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 1 --iter_num 2000 --backend ov --enable_collect_memory_info True
+    python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 8 --iter_num 2000 --backend ov --enable_collect_memory_info True
+    python benchmark_ppdet.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend ort --enable_collect_memory_info True
+    python benchmark_ppdet.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend paddle --enable_collect_memory_info True
+    python benchmark_ppdet.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend paddle_trt --enable_collect_memory_info True
+    python benchmark_ppdet.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend paddle_trt --enable_trt_fp16 True --enable_collect_memory_info True
+    python benchmark_ppdet.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend trt --enable_collect_memory_info True
+    python benchmark_ppdet.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend trt --enable_trt_fp16 True --enable_collect_memory_info True
     counter=$(($counter+1))
     step=$(( $counter % 1 ))
     if [ $step = 0 ]
@@ -30,4 +32,4 @@ rm -rf result_ppdet.txt
 touch result_ppdet.txt
 cat ppdet_model/*.txt >> ./result_ppdet.txt
 
-python convert_info.py --txt_path result_ppdet.txt --domain ppdet
+python convert_info.py --txt_path result_ppdet.txt --domain ppdet --enable_collect_memory_info True
diff --git a/benchmark/run_benchmark_ppseg.sh b/benchmark/run_benchmark_ppseg.sh
index e878c1529de..1964e80ebc2 100644
--- a/benchmark/run_benchmark_ppseg.sh
+++ b/benchmark/run_benchmark_ppseg.sh
@@ -6,16 +6,18 @@ counter=1
 for model in $(ls -d ppseg_model/* )
 do
     echo "[Benchmark-PPseg] ${counter}/${num_of_models} $model ..."
-    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend ort
-    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend ort
-    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend paddle
-    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend paddle
-    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend ov
-    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend ov
-    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend ort
-    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle
-    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend trt
-    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend trt --enable_trt_fp16 True
+    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend ort --enable_collect_memory_info True
+    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend ort --enable_collect_memory_info True
+    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend paddle --enable_collect_memory_info True
+    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend paddle --enable_collect_memory_info True
+    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend ov --enable_collect_memory_info True
+    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend ov --enable_collect_memory_info True
+    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend ort --enable_collect_memory_info True
+    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle --enable_collect_memory_info True
+    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle_trt --enable_collect_memory_info True
+    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle_trt --enable_trt_fp16 True --enable_collect_memory_info True
+    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend trt --enable_collect_memory_info True
+    python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend trt --enable_trt_fp16 True --enable_collect_memory_info True
     counter=$(($counter+1))
     step=$(( $counter % 1 ))
     if [ $step = 0 ]
@@ -30,4 +32,4 @@ rm -rf result_ppseg.txt
 touch result_ppseg.txt
 cat ppseg_model/*.txt >> ./result_ppseg.txt
 
-python convert_info.py --txt_path result_ppseg.txt --domain ppseg
+python convert_info.py --txt_path result_ppseg.txt --domain ppseg --enable_collect_memory_info True
diff --git a/benchmark/run_benchmark_yolo.sh b/benchmark/run_benchmark_yolo.sh
index 6f36c41eb57..7ec625fcc4b 100755
--- a/benchmark/run_benchmark_yolo.sh
+++ b/benchmark/run_benchmark_yolo.sh
@@ -6,13 +6,13 @@ counter=1
 for model in $(ls -d yolo_model/* )
 do
     echo "[Benchmark-Yolo] ${counter}/${num_of_models} $model ..."
-    python benchmark_yolo.py --model $model --image 000000014439.jpg --cpu_num_thread 1 --iter_num 2000 --backend ort
-    python benchmark_yolo.py --model $model --image 000000014439.jpg --cpu_num_thread 8 --iter_num 2000 --backend ort
-    python benchmark_yolo.py --model $model --image 000000014439.jpg --cpu_num_thread 1 --iter_num 2000 --backend ov
-    python benchmark_yolo.py --model $model --image 000000014439.jpg --cpu_num_thread 8 --iter_num 2000 --backend ov
-    python benchmark_yolo.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend ort
-    python benchmark_yolo.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend trt
-    python benchmark_yolo.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend trt --enable_trt_fp16 True
+    python benchmark_yolo.py --model $model --image 000000014439.jpg --cpu_num_thread 1 --iter_num 2000 --backend ort --enable_collect_memory_info True
+    python benchmark_yolo.py --model $model --image 000000014439.jpg --cpu_num_thread 8 --iter_num 2000 --backend ort --enable_collect_memory_info True
+    python benchmark_yolo.py --model $model --image 000000014439.jpg --cpu_num_thread 1 --iter_num 2000 --backend ov --enable_collect_memory_info True
+    python benchmark_yolo.py --model $model --image 000000014439.jpg --cpu_num_thread 8 --iter_num 2000 --backend ov --enable_collect_memory_info True
+    python benchmark_yolo.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend ort --enable_collect_memory_info True
+    python benchmark_yolo.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend trt --enable_collect_memory_info True
+    python benchmark_yolo.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend trt --enable_trt_fp16 True --enable_collect_memory_info True
     counter=$(($counter+1))
     step=$(( $counter % 1 ))
     if [ $step = 0 ]
@@ -27,4 +27,4 @@ rm -rf result_yolo.txt
 touch result_yolo.txt
 cat yolo_model/*.txt >> ./result_yolo.txt
 
-python convert_info.py --txt_path result_yolo.txt --domain yolo
+python convert_info.py --txt_path result_yolo.txt --domain yolo --enable_collect_memory_info True

From 00002346ce625702f43a89ae22837ae54cd175e6 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Wed, 2 Nov 2022 12:31:05 +0000
Subject: [PATCH 02/50] update benchmark in device

---
 benchmark/benchmark_ppcls.py | 80 ++++++++++++++++++++++++------------
 benchmark/benchmark_ppdet.py | 80 ++++++++++++++++++++++++------------
 benchmark/benchmark_ppseg.py | 80 ++++++++++++++++++++++++------------
 benchmark/benchmark_yolo.py  | 80 ++++++++++++++++++++++++------------
 4 files changed, 216 insertions(+), 104 deletions(-)

diff --git a/benchmark/benchmark_ppcls.py b/benchmark/benchmark_ppcls.py
index 5b05bfe6687..914ace71b03 100644
--- a/benchmark/benchmark_ppcls.py
+++ b/benchmark/benchmark_ppcls.py
@@ -22,9 +22,19 @@
 import time
 
 
+def str2bool(v):
+    if isinstance(v, bool):
+        return v
+    if v.lower() == 'true':
+        return True
+    elif v.lower() == 'false':
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Boolean value expected.')
+
+
 def parse_arguments():
     import argparse
-    import ast
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--model", required=True, help="Path of PaddleClas model.")
@@ -50,16 +60,16 @@ def parse_arguments():
     parser.add_argument(
         "--backend",
         type=str,
-        default="ort",
-        help="inference backend, ort, ov, trt, paddle, paddle_trt.")
+        default="default",
+        help="inference backend, default, ort, ov, trt, paddle, paddle_trt.")
     parser.add_argument(
         "--enable_trt_fp16",
-        type=bool,
+        type=str2bool,
         default=False,
         help="whether enable fp16 in trt backend")
     parser.add_argument(
         "--enable_collect_memory_info",
-        type=bool,
+        type=str2bool,
         default=False,
         help="whether enable collect memory info")
     args = parser.parse_args()
@@ -70,26 +80,43 @@ def build_option(args):
     option = fd.RuntimeOption()
     device = args.device
     backend = args.backend
+    enable_trt_fp16 = args.enable_trt_fp16
     option.set_cpu_thread_num(args.cpu_num_thread)
     if device == "gpu":
-        option.use_gpu(args.device_id)
-
-    if backend == "trt" or backend == "paddle_trt":
-        assert device == "gpu", "the trt backend need device==gpu"
-        option.use_trt_backend()
-        if backend == "paddle_trt":
-            option.enable_paddle_to_trt()
-        if args.enable_trt_fp16:
-            option.enable_trt_fp16()
-    elif backend == "ov":
-        assert device == "cpu", "the openvino backend need device==cpu"
-        option.use_openvino_backend()
-    elif backend == "paddle":
-        option.use_paddle_backend()
-    elif backend == "ort":
-        option.use_ort_backend()
+        option.use_gpu()
+        if backend == "ort":
+            option.use_ort_backend()
+        elif backend == "paddle":
+            option.use_paddle_backend()
+        elif backend in ["trt", "paddle_trt"]:
+            option.use_trt_backend()
+            if backend == "paddle_trt":
+                option.enable_paddle_to_trt()
+            if enable_trt_fp16:
+                option.enable_trt_fp16()
+        elif backend == "default":
+            return option
+        else:
+            raise Exception(
+                "While inference with GPU, only support default/ort/paddle/trt/paddle_trt now, {} is not supported.".
+                format(backend))
+    elif device == "cpu":
+        if backend == "ort":
+            option.use_ort_backend()
+        elif backend == "ov":
+            option.use_openvino_backend()
+        elif backend == "paddle":
+            option.use_paddle_backend()
+        elif backend == "default":
+            return option
+        else:
+            raise Exception(
+                "While inference with CPU, only support default/ort/ov/paddle now, {} is not supported.".
+                format(backend))
     else:
-        print("%s is an unsupported backend" % backend)
+        raise Exception(
+            "Only support device CPU/GPU now, {} is not supported.".format(
+                device))
 
     return option
 
@@ -123,6 +150,7 @@ def get_current_gputil(gpu_id):
     config_file = os.path.join(args.model, "inference_cls.yaml")
 
     gpu_id = args.device_id
+    enable_collect_memory_info = args.enable_collect_memory_info
     end2end_statis = list()
     cpu_mem = list()
     gpu_mem = list()
@@ -149,7 +177,7 @@ def get_current_gputil(gpu_id):
             start = time.time()
             result = model.predict(im)
             end2end_statis.append(time.time() - start)
-            if args.enable_collect_memory_info:
+            if enable_collect_memory_info:
                 gpu_util.append(get_current_gputil(gpu_id))
                 cm, gm = get_current_memory_mb(gpu_id)
                 cpu_mem.append(cm)
@@ -159,7 +187,7 @@ def get_current_gputil(gpu_id):
 
         warmup_iter = args.iter_num // 5
         end2end_statis_repeat = end2end_statis[warmup_iter:]
-        if args.enable_collect_memory_info:
+        if enable_collect_memory_info:
             cpu_mem_repeat = cpu_mem[warmup_iter:]
             gpu_mem_repeat = gpu_mem[warmup_iter:]
             gpu_util_repeat = gpu_util[warmup_iter:]
@@ -167,14 +195,14 @@ def get_current_gputil(gpu_id):
         dump_result = dict()
         dump_result["runtime"] = runtime_statis["avg_time"] * 1000
         dump_result["end2end"] = np.mean(end2end_statis_repeat) * 1000
-        if args.enable_collect_memory_info:
+        if enable_collect_memory_info:
             dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat)
             dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat)
             dump_result["gpu_util"] = np.mean(gpu_util_repeat)
 
         f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"])))
         f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"])))
-        if args.enable_collect_memory_info:
+        if enable_collect_memory_info:
             f.writelines("cpu_rss_mb: {} \n".format(
                 str(dump_result["cpu_rss_mb"])))
             f.writelines("gpu_rss_mb: {} \n".format(
diff --git a/benchmark/benchmark_ppdet.py b/benchmark/benchmark_ppdet.py
index 2f192de065b..cb8d47f44ea 100644
--- a/benchmark/benchmark_ppdet.py
+++ b/benchmark/benchmark_ppdet.py
@@ -24,9 +24,19 @@
 import time
 
 
+def str2bool(v):
+    if isinstance(v, bool):
+        return v
+    if v.lower() == 'true':
+        return True
+    elif v.lower() == 'false':
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Boolean value expected.')
+
+
 def parse_arguments():
     import argparse
-    import ast
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--model", required=True, help="Path of PaddleDetection model.")
@@ -52,16 +62,16 @@ def parse_arguments():
     parser.add_argument(
         "--backend",
         type=str,
-        default="ort",
-        help="inference backend, ort, ov, trt, paddle, paddle_trt.")
+        default="default",
+        help="inference backend, default, ort, ov, trt, paddle, paddle_trt.")
     parser.add_argument(
         "--enable_trt_fp16",
-        type=bool,
+        type=str2bool,
         default=False,
         help="whether enable fp16 in trt backend")
     parser.add_argument(
         "--enable_collect_memory_info",
-        type=bool,
+        type=str2bool,
         default=False,
         help="whether enable collect memory info")
     args = parser.parse_args()
@@ -72,26 +82,43 @@ def build_option(args):
     option = fd.RuntimeOption()
     device = args.device
     backend = args.backend
+    enable_trt_fp16 = args.enable_trt_fp16
     option.set_cpu_thread_num(args.cpu_num_thread)
     if device == "gpu":
-        option.use_gpu(args.device_id)
-
-    if backend == "trt" or backend == "paddle_trt":
-        assert device == "gpu", "the trt backend need device==gpu"
-        option.use_trt_backend()
-        if backend == "paddle_trt":
-            option.enable_paddle_to_trt()
-        if args.enable_trt_fp16:
-            option.enable_trt_fp16()
-    elif backend == "ov":
-        assert device == "cpu", "the openvino backend need device==cpu"
-        option.use_openvino_backend()
-    elif backend == "paddle":
-        option.use_paddle_backend()
-    elif backend == "ort":
-        option.use_ort_backend()
+        option.use_gpu()
+        if backend == "ort":
+            option.use_ort_backend()
+        elif backend == "paddle":
+            option.use_paddle_backend()
+        elif backend in ["trt", "paddle_trt"]:
+            option.use_trt_backend()
+            if backend == "paddle_trt":
+                option.enable_paddle_to_trt()
+            if enable_trt_fp16:
+                option.enable_trt_fp16()
+        elif backend == "default":
+            return option
+        else:
+            raise Exception(
+                "While inference with GPU, only support default/ort/paddle/trt/paddle_trt now, {} is not supported.".
+                format(backend))
+    elif device == "cpu":
+        if backend == "ort":
+            option.use_ort_backend()
+        elif backend == "ov":
+            option.use_openvino_backend()
+        elif backend == "paddle":
+            option.use_paddle_backend()
+        elif backend == "default":
+            return option
+        else:
+            raise Exception(
+                "While inference with CPU, only support default/ort/ov/paddle now, {} is not supported.".
+                format(backend))
     else:
-        print("%s is an unsupported backend" % backend)
+        raise Exception(
+            "Only support device CPU/GPU now, {} is not supported.".format(
+                device))
 
     return option
 
@@ -125,6 +152,7 @@ def get_current_gputil(gpu_id):
     config_file = os.path.join(args.model, "infer_cfg.yml")
 
     gpu_id = args.device_id
+    enable_collect_memory_info = args.enable_collect_memory_info
     end2end_statis = list()
     cpu_mem = list()
     gpu_mem = list()
@@ -169,7 +197,7 @@ def get_current_gputil(gpu_id):
             start = time.time()
             result = model.predict(im)
             end2end_statis.append(time.time() - start)
-            if args.enable_collect_memory_info:
+            if enable_collect_memory_info:
                 gpu_util.append(get_current_gputil(gpu_id))
                 cm, gm = get_current_memory_mb(gpu_id)
                 cpu_mem.append(cm)
@@ -179,7 +207,7 @@ def get_current_gputil(gpu_id):
 
         warmup_iter = args.iter_num // 5
         end2end_statis_repeat = end2end_statis[warmup_iter:]
-        if args.enable_collect_memory_info:
+        if enable_collect_memory_info:
             cpu_mem_repeat = cpu_mem[warmup_iter:]
             gpu_mem_repeat = gpu_mem[warmup_iter:]
             gpu_util_repeat = gpu_util[warmup_iter:]
@@ -187,14 +215,14 @@ def get_current_gputil(gpu_id):
         dump_result = dict()
         dump_result["runtime"] = runtime_statis["avg_time"] * 1000
         dump_result["end2end"] = np.mean(end2end_statis_repeat) * 1000
-        if args.enable_collect_memory_info:
+        if enable_collect_memory_info:
             dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat)
             dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat)
             dump_result["gpu_util"] = np.mean(gpu_util_repeat)
 
         f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"])))
         f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"])))
-        if args.enable_collect_memory_info:
+        if enable_collect_memory_info:
             f.writelines("cpu_rss_mb: {} \n".format(
                 str(dump_result["cpu_rss_mb"])))
             f.writelines("gpu_rss_mb: {} \n".format(
diff --git a/benchmark/benchmark_ppseg.py b/benchmark/benchmark_ppseg.py
index 81e0db797aa..2c7a37c2f14 100644
--- a/benchmark/benchmark_ppseg.py
+++ b/benchmark/benchmark_ppseg.py
@@ -22,9 +22,19 @@
 import time
 
 
+def str2bool(v):
+    if isinstance(v, bool):
+        return v
+    if v.lower() == 'true':
+        return True
+    elif v.lower() == 'false':
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Boolean value expected.')
+
+
 def parse_arguments():
     import argparse
-    import ast
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--model", required=True, help="Path of PaddleSeg model.")
@@ -50,16 +60,16 @@ def parse_arguments():
     parser.add_argument(
         "--backend",
         type=str,
-        default="ort",
-        help="inference backend, ort, ov, trt, paddle, paddle_trt.")
+        default="default",
+        help="inference backend, default, ort, ov, trt, paddle, paddle_trt.")
     parser.add_argument(
         "--enable_trt_fp16",
-        type=bool,
+        type=str2bool,
         default=False,
         help="whether enable fp16 in trt backend")
     parser.add_argument(
         "--enable_collect_memory_info",
-        type=bool,
+        type=str2bool,
         default=False,
         help="whether enable collect memory info")
     args = parser.parse_args()
@@ -70,26 +80,43 @@ def build_option(args):
     option = fd.RuntimeOption()
     device = args.device
     backend = args.backend
+    enable_trt_fp16 = args.enable_trt_fp16
     option.set_cpu_thread_num(args.cpu_num_thread)
     if device == "gpu":
-        option.use_gpu(args.device_id)
-
-    if backend == "trt" or backend == "paddle_trt":
-        assert device == "gpu", "the trt backend need device==gpu"
-        option.use_trt_backend()
-        if backend == "paddle_trt":
-            option.enable_paddle_to_trt()
-        if args.enable_trt_fp16:
-            option.enable_trt_fp16()
-    elif backend == "ov":
-        assert device == "cpu", "the openvino backend need device==cpu"
-        option.use_openvino_backend()
-    elif backend == "paddle":
-        option.use_paddle_backend()
-    elif backend == "ort":
-        option.use_ort_backend()
+        option.use_gpu()
+        if backend == "ort":
+            option.use_ort_backend()
+        elif backend == "paddle":
+            option.use_paddle_backend()
+        elif backend in ["trt", "paddle_trt"]:
+            option.use_trt_backend()
+            if backend == "paddle_trt":
+                option.enable_paddle_to_trt()
+            if enable_trt_fp16:
+                option.enable_trt_fp16()
+        elif backend == "default":
+            return option
+        else:
+            raise Exception(
+                "While inference with GPU, only support default/ort/paddle/trt/paddle_trt now, {} is not supported.".
+                format(backend))
+    elif device == "cpu":
+        if backend == "ort":
+            option.use_ort_backend()
+        elif backend == "ov":
+            option.use_openvino_backend()
+        elif backend == "paddle":
+            option.use_paddle_backend()
+        elif backend == "default":
+            return option
+        else:
+            raise Exception(
+                "While inference with CPU, only support default/ort/ov/paddle now, {} is not supported.".
+                format(backend))
     else:
-        print("%s is an unsupported backend" % backend)
+        raise Exception(
+            "Only support device CPU/GPU now, {} is not supported.".format(
+                device))
 
     return option
 
@@ -123,6 +150,7 @@ def get_current_gputil(gpu_id):
     config_file = os.path.join(args.model, "deploy.yaml")
 
     gpu_id = args.device_id
+    enable_collect_memory_info = args.enable_collect_memory_info
     end2end_statis = list()
     cpu_mem = list()
     gpu_mem = list()
@@ -148,7 +176,7 @@ def get_current_gputil(gpu_id):
             start = time.time()
             result = model.predict(im)
             end2end_statis.append(time.time() - start)
-            if args.enable_collect_memory_info:
+            if enable_collect_memory_info:
                 gpu_util.append(get_current_gputil(gpu_id))
                 cm, gm = get_current_memory_mb(gpu_id)
                 cpu_mem.append(cm)
@@ -158,7 +186,7 @@ def get_current_gputil(gpu_id):
 
         warmup_iter = args.iter_num // 5
         end2end_statis_repeat = end2end_statis[warmup_iter:]
-        if args.enable_collect_memory_info:
+        if enable_collect_memory_info:
             cpu_mem_repeat = cpu_mem[warmup_iter:]
             gpu_mem_repeat = gpu_mem[warmup_iter:]
             gpu_util_repeat = gpu_util[warmup_iter:]
@@ -166,14 +194,14 @@ def get_current_gputil(gpu_id):
         dump_result = dict()
         dump_result["runtime"] = runtime_statis["avg_time"] * 1000
         dump_result["end2end"] = np.mean(end2end_statis_repeat) * 1000
-        if args.enable_collect_memory_info:
+        if enable_collect_memory_info:
             dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat)
             dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat)
             dump_result["gpu_util"] = np.mean(gpu_util_repeat)
 
         f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"])))
         f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"])))
-        if args.enable_collect_memory_info:
+        if enable_collect_memory_info:
             f.writelines("cpu_rss_mb: {} \n".format(
                 str(dump_result["cpu_rss_mb"])))
             f.writelines("gpu_rss_mb: {} \n".format(
diff --git a/benchmark/benchmark_yolo.py b/benchmark/benchmark_yolo.py
index 65e89a516a1..f534c43f3e0 100644
--- a/benchmark/benchmark_yolo.py
+++ b/benchmark/benchmark_yolo.py
@@ -24,9 +24,19 @@
 import time
 
 
+def str2bool(v):
+    if isinstance(v, bool):
+        return v
+    if v.lower() == 'true':
+        return True
+    elif v.lower() == 'false':
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Boolean value expected.')
+
+
 def parse_arguments():
     import argparse
-    import ast
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--model", required=True, help="Path of Yolo onnx model.")
@@ -52,16 +62,16 @@ def parse_arguments():
     parser.add_argument(
         "--backend",
         type=str,
-        default="ort",
-        help="inference backend, ort, ov, trt, paddle, paddle_trt.")
+        default="default",
+        help="inference backend, default, ort, ov, trt, paddle, paddle_trt.")
     parser.add_argument(
         "--enable_trt_fp16",
-        type=bool,
+        type=str2bool,
         default=False,
         help="whether enable fp16 in trt backend")
     parser.add_argument(
         "--enable_collect_memory_info",
-        type=bool,
+        type=str2bool,
         default=False,
         help="whether enable collect memory info")
     args = parser.parse_args()
@@ -72,26 +82,43 @@ def build_option(args):
     option = fd.RuntimeOption()
     device = args.device
     backend = args.backend
+    enable_trt_fp16 = args.enable_trt_fp16
     option.set_cpu_thread_num(args.cpu_num_thread)
     if device == "gpu":
-        option.use_gpu(args.device_id)
-
-    if backend == "trt" or backend == "paddle_trt":
-        assert device == "gpu", "the trt backend need device==gpu"
-        option.use_trt_backend()
-        if backend == "paddle_trt":
-            option.enable_paddle_to_trt()
-        if args.enable_trt_fp16:
-            option.enable_trt_fp16()
-    elif backend == "ov":
-        assert device == "cpu", "the openvino backend need device==cpu"
-        option.use_openvino_backend()
-    elif backend == "paddle":
-        option.use_paddle_backend()
-    elif backend == "ort":
-        option.use_ort_backend()
+        option.use_gpu()
+        if backend == "ort":
+            option.use_ort_backend()
+        elif backend == "paddle":
+            option.use_paddle_backend()
+        elif backend in ["trt", "paddle_trt"]:
+            option.use_trt_backend()
+            if backend == "paddle_trt":
+                option.enable_paddle_to_trt()
+            if enable_trt_fp16:
+                option.enable_trt_fp16()
+        elif backend == "default":
+            return option
+        else:
+            raise Exception(
+                "While inference with GPU, only support default/ort/paddle/trt/paddle_trt now, {} is not supported.".
+                format(backend))
+    elif device == "cpu":
+        if backend == "ort":
+            option.use_ort_backend()
+        elif backend == "ov":
+            option.use_openvino_backend()
+        elif backend == "paddle":
+            option.use_paddle_backend()
+        elif backend == "default":
+            return option
+        else:
+            raise Exception(
+                "While inference with CPU, only support default/ort/ov/paddle now, {} is not supported.".
+                format(backend))
     else:
-        print("%s is an unsupported backend" % backend)
+        raise Exception(
+            "Only support device CPU/GPU now, {} is not supported.".format(
+                device))
 
     return option
 
@@ -123,6 +150,7 @@ def get_current_gputil(gpu_id):
     model_file = args.model
 
     gpu_id = args.device_id
+    enable_collect_memory_info = args.enable_collect_memory_info
     end2end_statis = list()
     cpu_mem = list()
     gpu_mem = list()
@@ -161,7 +189,7 @@ def get_current_gputil(gpu_id):
             start = time.time()
             result = model.predict(im)
             end2end_statis.append(time.time() - start)
-            if args.enable_collect_memory_info:
+            if enable_collect_memory_info:
                 gpu_util.append(get_current_gputil(gpu_id))
                 cm, gm = get_current_memory_mb(gpu_id)
                 cpu_mem.append(cm)
@@ -171,7 +199,7 @@ def get_current_gputil(gpu_id):
 
         warmup_iter = args.iter_num // 5
         end2end_statis_repeat = end2end_statis[warmup_iter:]
-        if args.enable_collect_memory_info:
+        if enable_collect_memory_info:
             cpu_mem_repeat = cpu_mem[warmup_iter:]
             gpu_mem_repeat = gpu_mem[warmup_iter:]
             gpu_util_repeat = gpu_util[warmup_iter:]
@@ -179,14 +207,14 @@ def get_current_gputil(gpu_id):
         dump_result = dict()
         dump_result["runtime"] = runtime_statis["avg_time"] * 1000
         dump_result["end2end"] = np.mean(end2end_statis_repeat) * 1000
-        if args.enable_collect_memory_info:
+        if enable_collect_memory_info:
             dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat)
             dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat)
             dump_result["gpu_util"] = np.mean(gpu_util_repeat)
 
         f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"])))
         f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"])))
-        if args.enable_collect_memory_info:
+        if enable_collect_memory_info:
             f.writelines("cpu_rss_mb: {} \n".format(
                 str(dump_result["cpu_rss_mb"])))
             f.writelines("gpu_rss_mb: {} \n".format(

From 20ddf39303b51c786463d749e11c9e03f7a6550a Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Thu, 3 Nov 2022 03:05:05 +0000
Subject: [PATCH 03/50] update benchmark

---
 benchmark/benchmark_ppcls.py | 16 +++-------------
 benchmark/benchmark_ppdet.py | 16 +++-------------
 benchmark/benchmark_ppseg.py | 16 +++-------------
 benchmark/benchmark_yolo.py  | 16 +++-------------
 4 files changed, 12 insertions(+), 52 deletions(-)

diff --git a/benchmark/benchmark_ppcls.py b/benchmark/benchmark_ppcls.py
index 914ace71b03..039a07cc9e1 100755
--- a/benchmark/benchmark_ppcls.py
+++ b/benchmark/benchmark_ppcls.py
@@ -22,19 +22,9 @@
 import time
 
 
-def str2bool(v):
-    if isinstance(v, bool):
-        return v
-    if v.lower() == 'true':
-        return True
-    elif v.lower() == 'false':
-        return False
-    else:
-        raise argparse.ArgumentTypeError('Boolean value expected.')
-
-
 def parse_arguments():
     import argparse
+    import ast
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--model", required=True, help="Path of PaddleClas model.")
@@ -64,12 +54,12 @@ def parse_arguments():
         help="inference backend, default, ort, ov, trt, paddle, paddle_trt.")
     parser.add_argument(
         "--enable_trt_fp16",
-        type=str2bool,
+        type=ast.literal_eval,
         default=False,
         help="whether enable fp16 in trt backend")
     parser.add_argument(
         "--enable_collect_memory_info",
-        type=str2bool,
+        type=ast.literal_eval,
         default=False,
         help="whether enable collect memory info")
     args = parser.parse_args()
diff --git a/benchmark/benchmark_ppdet.py b/benchmark/benchmark_ppdet.py
index cb8d47f44ea..6cabc4d4e99 100755
--- a/benchmark/benchmark_ppdet.py
+++ b/benchmark/benchmark_ppdet.py
@@ -24,19 +24,9 @@
 import time
 
 
-def str2bool(v):
-    if isinstance(v, bool):
-        return v
-    if v.lower() == 'true':
-        return True
-    elif v.lower() == 'false':
-        return False
-    else:
-        raise argparse.ArgumentTypeError('Boolean value expected.')
-
-
 def parse_arguments():
     import argparse
+    import ast
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--model", required=True, help="Path of PaddleDetection model.")
@@ -66,12 +56,12 @@ def parse_arguments():
         help="inference backend, default, ort, ov, trt, paddle, paddle_trt.")
     parser.add_argument(
         "--enable_trt_fp16",
-        type=str2bool,
+        type=ast.literal_eval,
         default=False,
         help="whether enable fp16 in trt backend")
     parser.add_argument(
         "--enable_collect_memory_info",
-        type=str2bool,
+        type=ast.literal_eval,
         default=False,
         help="whether enable collect memory info")
     args = parser.parse_args()
diff --git a/benchmark/benchmark_ppseg.py b/benchmark/benchmark_ppseg.py
index 2c7a37c2f14..ef57e371502 100755
--- a/benchmark/benchmark_ppseg.py
+++ b/benchmark/benchmark_ppseg.py
@@ -22,19 +22,9 @@
 import time
 
 
-def str2bool(v):
-    if isinstance(v, bool):
-        return v
-    if v.lower() == 'true':
-        return True
-    elif v.lower() == 'false':
-        return False
-    else:
-        raise argparse.ArgumentTypeError('Boolean value expected.')
-
-
 def parse_arguments():
     import argparse
+    import ast
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--model", required=True, help="Path of PaddleSeg model.")
@@ -64,12 +54,12 @@ def parse_arguments():
         help="inference backend, default, ort, ov, trt, paddle, paddle_trt.")
     parser.add_argument(
         "--enable_trt_fp16",
-        type=str2bool,
+        type=ast.literal_eval,
         default=False,
         help="whether enable fp16 in trt backend")
     parser.add_argument(
         "--enable_collect_memory_info",
-        type=str2bool,
+        type=ast.literal_eval,
         default=False,
         help="whether enable collect memory info")
     args = parser.parse_args()
diff --git a/benchmark/benchmark_yolo.py b/benchmark/benchmark_yolo.py
index f534c43f3e0..aa6927c8337 100755
--- a/benchmark/benchmark_yolo.py
+++ b/benchmark/benchmark_yolo.py
@@ -24,19 +24,9 @@
 import time
 
 
-def str2bool(v):
-    if isinstance(v, bool):
-        return v
-    if v.lower() == 'true':
-        return True
-    elif v.lower() == 'false':
-        return False
-    else:
-        raise argparse.ArgumentTypeError('Boolean value expected.')
-
-
 def parse_arguments():
     import argparse
+    import ast
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--model", required=True, help="Path of Yolo onnx model.")
@@ -66,12 +56,12 @@ def parse_arguments():
         help="inference backend, default, ort, ov, trt, paddle, paddle_trt.")
     parser.add_argument(
         "--enable_trt_fp16",
-        type=str2bool,
+        type=ast.literal_eval,
         default=False,
         help="whether enable fp16 in trt backend")
     parser.add_argument(
         "--enable_collect_memory_info",
-        type=str2bool,
+        type=ast.literal_eval,
         default=False,
         help="whether enable collect memory info")
     args = parser.parse_args()

From 2ce400549d42898430d234b2b8592a88a4aa526c Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Fri, 4 Nov 2022 03:50:23 +0000
Subject: [PATCH 04/50] update result doc

---
 docs/api_docs/python/vision_results_cn.md | 17 +++++++++++++++++
 docs/api_docs/python/vision_results_en.md | 20 ++++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/docs/api_docs/python/vision_results_cn.md b/docs/api_docs/python/vision_results_cn.md
index dab22e6a59c..19b2a6662da 100644
--- a/docs/api_docs/python/vision_results_cn.md
+++ b/docs/api_docs/python/vision_results_cn.md
@@ -16,6 +16,7 @@ API:`fastdeploy.vision.SegmentationResult`, 该结果返回:
 - **score_map**(list of float): 成员变量，与label_map一一对应的所预测的分割类别概率值(当导出模型时指定`--output_op argmax`)或者经过softmax归一化化后的概率值(当导出模型时指定`--output_op softmax`或者导出模型时指定`--output_op none`同时模型初始化的时候设置模型类成员属性`apply_softmax=true`).
 - **shape**(list of int): 成员变量，表示输出图片的尺寸，为`H*W`.
 
+
 ## DetectionResult
 DetectionResult代码定义在`fastdeploy/vision/common/result.h`中，用于表明图像检测出来的目标框、目标类别和目标置信度.
 
@@ -40,6 +41,7 @@ API:`fastdeploy.vision.FaceDetectionResult` , 该结果返回:
 - **landmarks**(list of list(float)): 成员变量，表示单张图片检测出来的所有人脸的关键点.
 - **landmarks_per_face**(int): 成员变量，表示每个人脸框中的关键点的数量.
 
+
 ## KeyPointDetectionResult
 KeyPointDetectionResult 代码定义在`fastdeploy/vision/common/result.h`中，用于表明图像中目标行为的各个关键点坐标和置信度。
 
@@ -70,6 +72,7 @@ API:`fastdeploy.vision.MattingResult`, 该结果返回:
 - **contain_foreground**(bool): 表示预测的结果是否包含前景.
 - **shape**(list of int): 表示输出结果的shape，当`contain_foreground`为`false`，shape只包含`(H,W)`，当`contain_foreground`为`true`，shape包含`(H,W,C)`, C一般为3.
 
+
 ## OCRResult
 OCRResult代码定义在`fastdeploy/vision/common/result.h`中，用于表明图像检测和识别出来的文本框，文本框方向分类，以及文本框内的文本内容.
 
@@ -79,3 +82,17 @@ API:`fastdeploy.vision.OCRResult`, 该结果返回:
 - **rec_scores**(list of float): 成员变量，表示文本框内识别出来的文本的置信度，其元素个数与`boxes.size()`一致.
 - **cls_scores**(list of float): 成员变量，表示文本框的分类结果的置信度，其元素个数与`boxes.size()`一致.
 - **cls_labels**(list of int): 成员变量，表示文本框的方向分类类别，其元素个数与`boxes.size()`一致.
+
+
+## FaceAlignmentResult
+FaceAlignmentResult 代码定义在`fastdeploy/vision/common/result.h`中，用于表明人脸landmarks。
+
+API:`fastdeploy.vision.FaceAlignmentResult`, 该结果返回:
+- **landmarks**(list of list(float)): 成员变量，表示单张人脸图片检测出来的所有关键点
+
+
+## HeadPoseResult
+HeadPoseResult 代码定义在`fastdeploy/vision/common/result.h`中，用于表明头部姿态结果。
+
+API:`fastdeploy.vision.HeadPoseResult`, 该结果返回:
+- **euler_angles**(list of float): 成员变量，表示单张人脸图片预测的欧拉角，存放的顺序是(yaw, pitch, roll)， yaw 代表水平转角，pitch 代表垂直角，roll 代表翻滚角，值域都为 [-90, +90]度
diff --git a/docs/api_docs/python/vision_results_en.md b/docs/api_docs/python/vision_results_en.md
index 513a011d7a3..cbf4e2d5ae1 100644
--- a/docs/api_docs/python/vision_results_en.md
+++ b/docs/api_docs/python/vision_results_en.md
@@ -10,6 +10,7 @@ API: `fastdeploy.vision.ClassifyResult`, The ClassifyResult will return:
 
 - **scores**(list of float):Member variables that indicate the confidence level of a single image on the corresponding classification result, the number of which is determined by the  `topk` passed in when using the classification model, e.g. the confidence level of a Top 5 classification can be returned.
 
+
 ## SegmentationResult
 The code of SegmentationResult is defined in `fastdeploy/vision/common/result.h` and is used to indicate the segmentation category predicted for each pixel in the image and the probability of the segmentation category.
 
@@ -33,6 +34,7 @@ API: `fastdeploy.vision.Mask`, The Mask will return:
 - **data**:Member variable indicating a detected mask.
 - **shape**:Member variable representing the shape of the mask, e.g.  `(H,W)`.
 
+
 ## FaceDetectionResult
 The FaceDetectionResult code is defined in `fastdeploy/vision/common/result.h` and is used to indicate the target frames detected by face detection, face landmarks, target confidence and the number of landmarks per face.
 
@@ -42,6 +44,7 @@ API: `fastdeploy.vision.FaceDetectionResult`, The FaceDetectionResult will retur
 - **landmarks**(list of list(float)): Member variables that represent the key points of all faces detected by a single image.
 - **landmarks_per_face**(int):Member variable indicating the number of key points in each face frame.
 
+
 ## KeyPointDetectionResult
 The KeyPointDetectionResult code is defined in `fastdeploy/vision/common/result.h` and is used to indicate the coordinates and confidence of each keypoint of the target behavior in the image.
 
@@ -55,12 +58,14 @@ API:`fastdeploy.vision.KeyPointDetectionResult`, The KeyPointDetectionResult wil
     - `J`: num_joints（number of keypoints for a target）
 - **num_joints**(int): Member variable, representing the number of keypoints for a target
 
+
 ## FaceRecognitionResult
 The FaceRecognitionResult code is defined in `fastdeploy/vision/common/result.h` and is used to indicate the embedding of the image features by the face recognition model.
 
 API: `fastdeploy.vision.FaceRecognitionResult`, The FaceRecognitionResult will return:
 - **landmarks_per_face**(list of float):Member variables, which indicate the final extracted features embedding of the face recognition model, can be used to calculate the feature similarity between faces.
 
+
 ## MattingResult
 The MattingResult code is defined in `fastdeploy/vision/common/result.h` and is used to indicate the value of alpha transparency predicted by the model, the predicted outlook, etc.
 
@@ -70,6 +75,7 @@ API:`fastdeploy.vision.MattingResult`, The MattingResult will return:
 - **contain_foreground**(bool):Indicates whether the predicted outcome includes the foreground.
 - **shape**(list of int): When `contain_foreground` is false, the shape only contains `(H,W)`, when `contain_foreground` is `true,` the shape contains `(H,W,C)`, C is generally 3.
 
+
 ## OCRResult
 The OCRResult code is defined in `fastdeploy/vision/common/result.h` and is used to indicate the text box detected in the image, the text box orientation classification, and the text content recognized inside the text box.
 
@@ -79,3 +85,17 @@ API:`fastdeploy.vision.OCRResult`, The OCRResult will return:
 - **rec_scores**(list of float):Member variable indicating the confidence level of the text identified in the box, the number of elements is the same as `boxes.size()`.
 - **cls_scores**(list of float):Member variable indicating the confidence level of the classification result of the text box, with the same number of elements as `boxes.size()`.
 - **cls_labels**(list of int):Member variable indicating the orientation category of the text box, the number of elements is the same as `boxes.size()`.
+
+
+## FaceAlignmentResult
+The code of FaceAlignmentResult is defined in `fastdeploy/vision/common/result.h` and is used to indicate the key points of the face.
+
+API: `fastdeploy.vision.FaceAlignmentResult`, The FaceAlignmentResult will return:
+- **landmarks**(list of list(float)):Member variables that represent the all key points detected from a single face image.
+
+
+## HeadPoseResult
+The code of HeadPoseResult is defined in `fastdeploy/vision/common/result.h` and is used to indicate the head pose result.
+
+API: `fastdeploy.vision.HeadPoseResult`, The HeadPoseResult will return:
+- **euler_angles**(list of float):Member variables that represent the Euler angle predicted by a single face image. The storage order is (yaw, pitch, roll), yaw represents the horizontal angle, pitch represents the vertical angle, roll represents the roll angle, and the value range is [-90, +90] Spend.

From 36889ff829e5540fe1fa5f21387905a337255c32 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Fri, 4 Nov 2022 07:06:07 +0000
Subject: [PATCH 05/50] fixed for CI

---
 examples/CMakeLists.txt                            | 2 +-
 examples/vision/facealign/pfld/cpp/CMakeLists.txt  | 2 +-
 examples/vision/headpose/fsanet/cpp/CMakeLists.txt | 2 +-
 examples/vision/headpose/fsanet/cpp/infer.cc       | 6 +++---
 fastdeploy/vision/matting/contrib/rvm.cc           | 3 ++-
 5 files changed, 8 insertions(+), 7 deletions(-)
 mode change 100644 => 100755 examples/vision/headpose/fsanet/cpp/CMakeLists.txt
 mode change 100644 => 100755 fastdeploy/vision/matting/contrib/rvm.cc

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 7118460ea6e..8aa469b6a5b 100755
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -49,7 +49,7 @@ function(add_fastdeploy_executable FIELD CC_FILE)
     add_executable(${TEMP_TARGET_NAME} ${TEMP_TARGET_FILE})
     target_link_libraries(${TEMP_TARGET_NAME} PUBLIC fastdeploy)
     if(TARGET gflags)
-      if(UNIX)
+      if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
         target_link_libraries(${TEMP_TARGET_NAME} PRIVATE gflags pthread)
       else()
         target_link_libraries(${TEMP_TARGET_NAME} PRIVATE gflags)
diff --git a/examples/vision/facealign/pfld/cpp/CMakeLists.txt b/examples/vision/facealign/pfld/cpp/CMakeLists.txt
index be329f69ac0..c417fcb3880 100755
--- a/examples/vision/facealign/pfld/cpp/CMakeLists.txt
+++ b/examples/vision/facealign/pfld/cpp/CMakeLists.txt
@@ -11,7 +11,7 @@ include_directories(${FASTDEPLOY_INCS})
 
 add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
 # 添加FastDeploy库依赖
-if(UNIX)
+if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
   target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} gflags pthread)
 else()
   target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} gflags)
diff --git a/examples/vision/headpose/fsanet/cpp/CMakeLists.txt b/examples/vision/headpose/fsanet/cpp/CMakeLists.txt
old mode 100644
new mode 100755
index be329f69ac0..c417fcb3880
--- a/examples/vision/headpose/fsanet/cpp/CMakeLists.txt
+++ b/examples/vision/headpose/fsanet/cpp/CMakeLists.txt
@@ -11,7 +11,7 @@ include_directories(${FASTDEPLOY_INCS})
 
 add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
 # 添加FastDeploy库依赖
-if(UNIX)
+if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
   target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} gflags pthread)
 else()
   target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} gflags)
diff --git a/examples/vision/headpose/fsanet/cpp/infer.cc b/examples/vision/headpose/fsanet/cpp/infer.cc
index 332f492606b..522ec3d9540 100644
--- a/examples/vision/headpose/fsanet/cpp/infer.cc
+++ b/examples/vision/headpose/fsanet/cpp/infer.cc
@@ -44,7 +44,7 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
     } else if (FLAGS_backend == "trt" || 
                FLAGS_backend == "paddle_trt") {
       option->UseTrtBackend();
-      option.SetTrtInputShape("images", {1, 3, 64, 64});
+      option->SetTrtInputShape("images", {1, 3, 64, 64});
       if (FLAGS_backend == "paddle_trt") {
         option->EnablePaddleToTrt();
       }
@@ -54,7 +54,7 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
     } else if (FLAGS_backend == "default") {
       return true;
     } else {
-      std::cout << "While inference with GPU, only support default/ort/paddle/trt/paddle_trt now, " << FLAG_backend << " is not supported." << std::endl;
+      std::cout << "While inference with GPU, only support default/ort/paddle/trt/paddle_trt now, " << FLAGS_backend << " is not supported." << std::endl;
       return false;
     }
   } else if (FLAGS_device == "cpu") {
@@ -67,7 +67,7 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
     } else if (FLAGS_backend == "default") {
       return true;
     } else {
-      std::cout << "While inference with CPU, only support default/ort/ov/paddle now, " << FLAG_backend << " is not supported." << std::endl;
+      std::cout << "While inference with CPU, only support default/ort/ov/paddle now, " << FLAGS_backend << " is not supported." << std::endl;
       return false;
     }
   } else {
diff --git a/fastdeploy/vision/matting/contrib/rvm.cc b/fastdeploy/vision/matting/contrib/rvm.cc
old mode 100644
new mode 100755
index ec8ed19fc47..6f48a38652a
--- a/fastdeploy/vision/matting/contrib/rvm.cc
+++ b/fastdeploy/vision/matting/contrib/rvm.cc
@@ -138,7 +138,8 @@ bool RobustVideoMatting::Postprocess(
 
   result->Clear();
   result->contain_foreground = true;
-  result->shape = {static_cast<int64_t>(in_h), static_cast<int64_t>(in_w)};
+  // if contain_foreground == true, shape must set to (h, w, c)
+  result->shape = {static_cast<int64_t>(in_h), static_cast<int64_t>(in_w), 3};
   int numel = in_h * in_w;
   int nbytes = numel * sizeof(float);
   result->Resize(numel);

From f32a09ad2b82732c9900772ae66e469b82149178 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Mon, 7 Nov 2022 06:19:42 +0000
Subject: [PATCH 06/50] update python api_docs

---
 docs/api_docs/python/face_alignment.md | 9 +++++++++
 docs/api_docs/python/headpose.md       | 9 +++++++++
 docs/api_docs/python/matting.md        | 8 ++++++++
 3 files changed, 26 insertions(+)
 create mode 100644 docs/api_docs/python/face_alignment.md
 create mode 100644 docs/api_docs/python/headpose.md

diff --git a/docs/api_docs/python/face_alignment.md b/docs/api_docs/python/face_alignment.md
new file mode 100644
index 00000000000..f0369b55af6
--- /dev/null
+++ b/docs/api_docs/python/face_alignment.md
@@ -0,0 +1,9 @@
+# Face Alignment API
+
+## fastdeploy.vision.facealign.PFLD
+
+```{eval-rst}
+.. autoclass:: fastdeploy.vision.facealign.PFLD
+    :members:
+    :inherited-members:
+```
diff --git a/docs/api_docs/python/headpose.md b/docs/api_docs/python/headpose.md
new file mode 100644
index 00000000000..d1fba74f927
--- /dev/null
+++ b/docs/api_docs/python/headpose.md
@@ -0,0 +1,9 @@
+# Headpose API
+
+## fastdeploy.vision.headpose.FSANet
+
+```{eval-rst}
+.. autoclass:: fastdeploy.vision.headpose.FSANet
+    :members:
+    :inherited-members:
+```
diff --git a/docs/api_docs/python/matting.md b/docs/api_docs/python/matting.md
index 7c121110acb..5e9c2a22732 100644
--- a/docs/api_docs/python/matting.md
+++ b/docs/api_docs/python/matting.md
@@ -15,3 +15,11 @@
     :members:
     :inherited-members:
 ```
+
+## fastdeploy.vision.matting.RobustVideoMatting
+
+```{eval-rst}
+.. autoclass:: fastdeploy.vision.matting.RobustVideoMatting
+    :members:
+    :inherited-members:
+```

From 7fa4dea9ee03c1058c09f81251b37dc5b8673bee Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Mon, 7 Nov 2022 06:26:57 +0000
Subject: [PATCH 07/50] update index.rst

---
 docs/api_docs/python/index.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/api_docs/python/index.rst b/docs/api_docs/python/index.rst
index 06d4a95cbbe..60eea324e7f 100644
--- a/docs/api_docs/python/index.rst
+++ b/docs/api_docs/python/index.rst
@@ -20,4 +20,6 @@ FastDeploy
     matting.md
     face_recognition.md
     face_detection.md
+    face_alignment.md
+    headpose.md
     vision_results_en.md

From 438906024077cfd30c8ecbf2509ef6bbd7facdd5 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Mon, 7 Nov 2022 09:15:03 +0000
Subject: [PATCH 08/50] add runtime cpp examples

---
 docs/api_docs/python/index.rst                |  2 +
 docs/api_docs/python/runtime.md               |  9 +++
 docs/api_docs/python/runtime_option.md        |  9 +++
 examples/runtime/cpp/CMakeLists.txt           | 14 ++++
 examples/runtime/cpp/infer_onnx_openvino.cc   | 59 +++++++++++++++++
 examples/runtime/cpp/infer_onnx_tensorrt.cc   | 60 +++++++++++++++++
 .../runtime/cpp/infer_paddle_onnxruntime.cc   | 60 +++++++++++++++++
 examples/runtime/cpp/infer_paddle_openvino.cc | 60 +++++++++++++++++
 .../cpp/infer_paddle_paddle_inference.cc      | 65 +++++++++++++++++++
 examples/runtime/cpp/infer_paddle_tensorrt.cc | 61 +++++++++++++++++
 .../runtime/python/infer_paddle_tensorrt.py   |  1 +
 11 files changed, 400 insertions(+)
 create mode 100644 docs/api_docs/python/runtime.md
 create mode 100644 docs/api_docs/python/runtime_option.md
 create mode 100644 examples/runtime/cpp/CMakeLists.txt
 create mode 100644 examples/runtime/cpp/infer_onnx_openvino.cc
 create mode 100644 examples/runtime/cpp/infer_onnx_tensorrt.cc
 create mode 100644 examples/runtime/cpp/infer_paddle_onnxruntime.cc
 create mode 100644 examples/runtime/cpp/infer_paddle_openvino.cc
 create mode 100644 examples/runtime/cpp/infer_paddle_paddle_inference.cc
 create mode 100644 examples/runtime/cpp/infer_paddle_tensorrt.cc

diff --git a/docs/api_docs/python/index.rst b/docs/api_docs/python/index.rst
index 60eea324e7f..69b65b3b101 100644
--- a/docs/api_docs/python/index.rst
+++ b/docs/api_docs/python/index.rst
@@ -23,3 +23,5 @@ FastDeploy
     face_alignment.md
     headpose.md
     vision_results_en.md
+    runtime.md
+    runtime_option.md
diff --git a/docs/api_docs/python/runtime.md b/docs/api_docs/python/runtime.md
new file mode 100644
index 00000000000..4a519ee7ee1
--- /dev/null
+++ b/docs/api_docs/python/runtime.md
@@ -0,0 +1,9 @@
+# Runtime API
+
+## fastdeploy.Runtime
+
+```{eval-rst}
+.. autoclass:: fastdeploy.Runtime
+    :members:
+    :inherited-members:
+```
diff --git a/docs/api_docs/python/runtime_option.md b/docs/api_docs/python/runtime_option.md
new file mode 100644
index 00000000000..96eff8672d4
--- /dev/null
+++ b/docs/api_docs/python/runtime_option.md
@@ -0,0 +1,9 @@
+# Runtime Option API
+
+## fastdeploy.RuntimeOption
+
+```{eval-rst}
+.. autoclass:: fastdeploy.RuntimeOption
+    :members:
+    :inherited-members:
+```
diff --git a/examples/runtime/cpp/CMakeLists.txt b/examples/runtime/cpp/CMakeLists.txt
new file mode 100644
index 00000000000..09ea45c3b89
--- /dev/null
+++ b/examples/runtime/cpp/CMakeLists.txt
@@ -0,0 +1,14 @@
+PROJECT(runtime_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.12)
+
+# 指定下载解压后的fastdeploy库路径
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+# 添加FastDeploy依赖头文件
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(runtime_demo ${PROJECT_SOURCE_DIR}/infer_onnx_openvino.cc)
+# 添加FastDeploy库依赖
+target_link_libraries(runtime_demo ${FASTDEPLOY_LIBS})
diff --git a/examples/runtime/cpp/infer_onnx_openvino.cc b/examples/runtime/cpp/infer_onnx_openvino.cc
new file mode 100644
index 00000000000..4588ec2de6b
--- /dev/null
+++ b/examples/runtime/cpp/infer_onnx_openvino.cc
@@ -0,0 +1,59 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/runtime.h"
+
+namespace fd = fastdeploy;
+
+int main(int argc, char* argv[]) {
+  std::string model_file = "mobilenetv2.onnx";
+
+  // setup option
+  fd::RuntimeOption runtime_option;
+  runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX);
+  runtime_option.UseOpenVINOBackend();
+  runtime_option.SetCpuThreadNum(12);
+  // init runtime
+  std::unique_ptr<fd::Runtime> runtime =
+      std::unique_ptr<fd::Runtime>(new fd::Runtime());
+  if (!runtime->Init(runtime_option)) {
+    std::cerr << "--- Init FastDeploy Runitme Failed! "
+              << "\n--- Model:  " << model_file << std::endl;
+    return -1;
+  } else {
+    std::cout << "--- Init FastDeploy Runitme Done! "
+              << "\n--- Model:  " << model_file << std::endl;
+  }
+  // init input tensor shape
+  fd::TensorInfo info = runtime->GetInputInfo(0);
+  info.shape = {1, 3, 224, 224};
+
+  std::vector<fd::FDTensor> input_tensors(1);
+  std::vector<fd::FDTensor> output_tensors(1);
+
+  std::vector<float> inputs_data;
+  inputs_data.reserve(1 * 3 * 224 * 224);
+  for (size_t i = 0; i < inputs_data.size(); ++i) {
+    inputs_data[i] = std::rand() % 1000 / 1000.0f;
+  }
+  input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
+  
+  //get input name
+  input_tensors[0].name = info.name;
+
+  runtime->Infer(input_tensors, &output_tensors);
+
+  output_tensors[0].PrintInfo();
+  return 0;
+}
\ No newline at end of file
diff --git a/examples/runtime/cpp/infer_onnx_tensorrt.cc b/examples/runtime/cpp/infer_onnx_tensorrt.cc
new file mode 100644
index 00000000000..26858db6631
--- /dev/null
+++ b/examples/runtime/cpp/infer_onnx_tensorrt.cc
@@ -0,0 +1,60 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/runtime.h"
+
+namespace fd = fastdeploy;
+
+int main(int argc, char* argv[]) {
+  std::string model_file = "mobilenetv2.onnx";
+
+  // setup option
+  fd::RuntimeOption runtime_option;
+  runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX);
+  runtime_option.UseGpu(0);
+  runtime_option.UseTrtBackend();
+  runtime_option.SetTrtInputShape("inputs", {1, 3, 224, 224});
+  // init runtime
+  std::unique_ptr<fd::Runtime> runtime =
+      std::unique_ptr<fd::Runtime>(new fd::Runtime());
+  if (!runtime->Init(runtime_option)) {
+    std::cerr << "--- Init FastDeploy Runitme Failed! "
+              << "\n--- Model:  " << model_file << std::endl;
+    return -1;
+  } else {
+    std::cout << "--- Init FastDeploy Runitme Done! "
+              << "\n--- Model:  " << model_file << std::endl;
+  }
+  // init input tensor shape
+  fd::TensorInfo info = runtime->GetInputInfo(0);
+  info.shape = {1, 3, 224, 224};
+
+  std::vector<fd::FDTensor> input_tensors(1);
+  std::vector<fd::FDTensor> output_tensors(1);
+
+  std::vector<float> inputs_data;
+  inputs_data.reserve(1 * 3 * 224 * 224);
+  for (size_t i = 0; i < inputs_data.size(); ++i) {
+    inputs_data[i] = std::rand() % 1000 / 1000.0f;
+  }
+  input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
+  
+  //get input name
+  input_tensors[0].name = info.name;
+
+  runtime->Infer(input_tensors, &output_tensors);
+
+  output_tensors[0].PrintInfo();
+  return 0;
+}
\ No newline at end of file
diff --git a/examples/runtime/cpp/infer_paddle_onnxruntime.cc b/examples/runtime/cpp/infer_paddle_onnxruntime.cc
new file mode 100644
index 00000000000..d9ed0819c1c
--- /dev/null
+++ b/examples/runtime/cpp/infer_paddle_onnxruntime.cc
@@ -0,0 +1,60 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/runtime.h"
+
+namespace fd = fastdeploy;
+
+int main(int argc, char* argv[]) {
+  std::string model_file = "mobilenetv2/inference.pdmodel";
+  std::string params_file = "mobilenetv2/inference.pdiparams";
+
+  // setup option
+  fd::RuntimeOption runtime_option;
+  runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
+  runtime_option.UseOrtBackend();
+  runtime_option.SetCpuThreadNum(12);
+  // init runtime
+  std::unique_ptr<fd::Runtime> runtime =
+      std::unique_ptr<fd::Runtime>(new fd::Runtime());
+  if (!runtime->Init(runtime_option)) {
+    std::cerr << "--- Init FastDeploy Runitme Failed! "
+              << "\n--- Model:  " << model_file << std::endl;
+    return -1;
+  } else {
+    std::cout << "--- Init FastDeploy Runitme Done! "
+              << "\n--- Model:  " << model_file << std::endl;
+  }
+  // init input tensor shape
+  fd::TensorInfo info = runtime->GetInputInfo(0);
+  info.shape = {1, 3, 224, 224};
+
+  std::vector<fd::FDTensor> input_tensors(1);
+  std::vector<fd::FDTensor> output_tensors(1);
+
+  std::vector<float> inputs_data;
+  inputs_data.reserve(1 * 3 * 224 * 224);
+  for (size_t i = 0; i < inputs_data.size(); ++i) {
+    inputs_data[i] = std::rand() % 1000 / 1000.0f;
+  }
+  input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
+  
+  //get input name
+  input_tensors[0].name = info.name;
+
+  runtime->Infer(input_tensors, &output_tensors);
+
+  output_tensors[0].PrintInfo();
+  return 0;
+}
\ No newline at end of file
diff --git a/examples/runtime/cpp/infer_paddle_openvino.cc b/examples/runtime/cpp/infer_paddle_openvino.cc
new file mode 100644
index 00000000000..3862437d049
--- /dev/null
+++ b/examples/runtime/cpp/infer_paddle_openvino.cc
@@ -0,0 +1,60 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/runtime.h"
+
+namespace fd = fastdeploy;
+
+int main(int argc, char* argv[]) {
+  std::string model_file = "mobilenetv2/inference.pdmodel";
+  std::string params_file = "mobilenetv2/inference.pdiparams";
+
+  // setup option
+  fd::RuntimeOption runtime_option;
+  runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
+  runtime_option.UseOpenVINOBackend();
+  runtime_option.SetCpuThreadNum(12);
+  // init runtime
+  std::unique_ptr<fd::Runtime> runtime =
+      std::unique_ptr<fd::Runtime>(new fd::Runtime());
+  if (!runtime->Init(runtime_option)) {
+    std::cerr << "--- Init FastDeploy Runitme Failed! "
+              << "\n--- Model:  " << model_file << std::endl;
+    return -1;
+  } else {
+    std::cout << "--- Init FastDeploy Runitme Done! "
+              << "\n--- Model:  " << model_file << std::endl;
+  }
+  // init input tensor shape
+  fd::TensorInfo info = runtime->GetInputInfo(0);
+  info.shape = {1, 3, 224, 224};
+
+  std::vector<fd::FDTensor> input_tensors(1);
+  std::vector<fd::FDTensor> output_tensors(1);
+
+  std::vector<float> inputs_data;
+  inputs_data.reserve(1 * 3 * 224 * 224);
+  for (size_t i = 0; i < inputs_data.size(); ++i) {
+    inputs_data[i] = std::rand() % 1000 / 1000.0f;
+  }
+  input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
+  
+  //get input name
+  input_tensors[0].name = info.name;
+
+  runtime->Infer(input_tensors, &output_tensors);
+
+  output_tensors[0].PrintInfo();
+  return 0;
+}
\ No newline at end of file
diff --git a/examples/runtime/cpp/infer_paddle_paddle_inference.cc b/examples/runtime/cpp/infer_paddle_paddle_inference.cc
new file mode 100644
index 00000000000..ac34f9bf8e3
--- /dev/null
+++ b/examples/runtime/cpp/infer_paddle_paddle_inference.cc
@@ -0,0 +1,65 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/runtime.h"
+
+namespace fd = fastdeploy;
+
+int main(int argc, char* argv[]) {
+  std::string model_file = "mobilenetv2/inference.pdmodel";
+  std::string params_file = "mobilenetv2/inference.pdiparams";
+
+  // setup option
+  fd::RuntimeOption runtime_option;
+  runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
+  // CPU
+  runtime_option.UsePaddleBackend();
+  runtime_option.SetCpuThreadNum(12);
+  // GPU
+  // runtime_option.UseGpu(0);
+  // IPU
+  // runtime_option.UseIpu();
+  // init runtime
+  std::unique_ptr<fd::Runtime> runtime =
+      std::unique_ptr<fd::Runtime>(new fd::Runtime());
+  if (!runtime->Init(runtime_option)) {
+    std::cerr << "--- Init FastDeploy Runitme Failed! "
+              << "\n--- Model:  " << model_file << std::endl;
+    return -1;
+  } else {
+    std::cout << "--- Init FastDeploy Runitme Done! "
+              << "\n--- Model:  " << model_file << std::endl;
+  }
+  // init input tensor shape
+  fd::TensorInfo info = runtime->GetInputInfo(0);
+  info.shape = {1, 3, 224, 224};
+
+  std::vector<fd::FDTensor> input_tensors(1);
+  std::vector<fd::FDTensor> output_tensors(1);
+
+  std::vector<float> inputs_data;
+  inputs_data.reserve(1 * 3 * 224 * 224);
+  for (size_t i = 0; i < inputs_data.size(); ++i) {
+    inputs_data[i] = std::rand() % 1000 / 1000.0f;
+  }
+  input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
+  
+  //get input name
+  input_tensors[0].name = info.name;
+
+  runtime->Infer(input_tensors, &output_tensors);
+
+  output_tensors[0].PrintInfo();
+  return 0;
+}
\ No newline at end of file
diff --git a/examples/runtime/cpp/infer_paddle_tensorrt.cc b/examples/runtime/cpp/infer_paddle_tensorrt.cc
new file mode 100644
index 00000000000..7189ee87d0d
--- /dev/null
+++ b/examples/runtime/cpp/infer_paddle_tensorrt.cc
@@ -0,0 +1,61 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/runtime.h"
+
+namespace fd = fastdeploy;
+
+int main(int argc, char* argv[]) {
+  std::string model_file = "mobilenetv2/inference.pdmodel";
+  std::string params_file = "mobilenetv2/inference.pdiparams";
+
+  // setup option
+  fd::RuntimeOption runtime_option;
+  runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
+  runtime_option.UseGpu(0);
+  runtime_option.UseTrtBackend();
+  runtime_option.EnablePaddleToTrt();
+  // init runtime
+  std::unique_ptr<fd::Runtime> runtime =
+      std::unique_ptr<fd::Runtime>(new fd::Runtime());
+  if (!runtime->Init(runtime_option)) {
+    std::cerr << "--- Init FastDeploy Runitme Failed! "
+              << "\n--- Model:  " << model_file << std::endl;
+    return -1;
+  } else {
+    std::cout << "--- Init FastDeploy Runitme Done! "
+              << "\n--- Model:  " << model_file << std::endl;
+  }
+  // init input tensor shape
+  fd::TensorInfo info = runtime->GetInputInfo(0);
+  info.shape = {1, 3, 224, 224};
+
+  std::vector<fd::FDTensor> input_tensors(1);
+  std::vector<fd::FDTensor> output_tensors(1);
+
+  std::vector<float> inputs_data;
+  inputs_data.reserve(1 * 3 * 224 * 224);
+  for (size_t i = 0; i < inputs_data.size(); ++i) {
+    inputs_data[i] = std::rand() % 1000 / 1000.0f;
+  }
+  input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
+  
+  //get input name
+  input_tensors[0].name = info.name;
+
+  runtime->Infer(input_tensors, &output_tensors);
+
+  output_tensors[0].PrintInfo();
+  return 0;
+}
\ No newline at end of file
diff --git a/examples/runtime/python/infer_paddle_tensorrt.py b/examples/runtime/python/infer_paddle_tensorrt.py
index ad2b8e1976a..8388656c79f 100644
--- a/examples/runtime/python/infer_paddle_tensorrt.py
+++ b/examples/runtime/python/infer_paddle_tensorrt.py
@@ -27,6 +27,7 @@
 # **** GPU 配置 ***
 option.use_gpu(0)
 option.use_trt_backend()
+option.enable_paddle_to_trt()
 
 # 初始化构造runtime
 runtime = fd.Runtime(option)

From 84d564fa7c2463c6641cd2e09288605be0f652b7 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Mon, 7 Nov 2022 11:03:17 +0000
Subject: [PATCH 09/50] deal with comments

---
 examples/runtime/cpp/infer_onnx_openvino.cc           | 2 +-
 examples/runtime/cpp/infer_onnx_tensorrt.cc           | 2 +-
 examples/runtime/cpp/infer_paddle_onnxruntime.cc      | 2 +-
 examples/runtime/cpp/infer_paddle_openvino.cc         | 2 +-
 examples/runtime/cpp/infer_paddle_paddle_inference.cc | 2 +-
 examples/runtime/cpp/infer_paddle_tensorrt.cc         | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/runtime/cpp/infer_onnx_openvino.cc b/examples/runtime/cpp/infer_onnx_openvino.cc
index 4588ec2de6b..c2f270be9f3 100644
--- a/examples/runtime/cpp/infer_onnx_openvino.cc
+++ b/examples/runtime/cpp/infer_onnx_openvino.cc
@@ -43,7 +43,7 @@ int main(int argc, char* argv[]) {
   std::vector<fd::FDTensor> output_tensors(1);
 
   std::vector<float> inputs_data;
-  inputs_data.reserve(1 * 3 * 224 * 224);
+  inputs_data.resize(1 * 3 * 224 * 224);
   for (size_t i = 0; i < inputs_data.size(); ++i) {
     inputs_data[i] = std::rand() % 1000 / 1000.0f;
   }
diff --git a/examples/runtime/cpp/infer_onnx_tensorrt.cc b/examples/runtime/cpp/infer_onnx_tensorrt.cc
index 26858db6631..084c1dfae6d 100644
--- a/examples/runtime/cpp/infer_onnx_tensorrt.cc
+++ b/examples/runtime/cpp/infer_onnx_tensorrt.cc
@@ -44,7 +44,7 @@ int main(int argc, char* argv[]) {
   std::vector<fd::FDTensor> output_tensors(1);
 
   std::vector<float> inputs_data;
-  inputs_data.reserve(1 * 3 * 224 * 224);
+  inputs_data.resize(1 * 3 * 224 * 224);
   for (size_t i = 0; i < inputs_data.size(); ++i) {
     inputs_data[i] = std::rand() % 1000 / 1000.0f;
   }
diff --git a/examples/runtime/cpp/infer_paddle_onnxruntime.cc b/examples/runtime/cpp/infer_paddle_onnxruntime.cc
index d9ed0819c1c..d8d036a0346 100644
--- a/examples/runtime/cpp/infer_paddle_onnxruntime.cc
+++ b/examples/runtime/cpp/infer_paddle_onnxruntime.cc
@@ -44,7 +44,7 @@ int main(int argc, char* argv[]) {
   std::vector<fd::FDTensor> output_tensors(1);
 
   std::vector<float> inputs_data;
-  inputs_data.reserve(1 * 3 * 224 * 224);
+  inputs_data.resize(1 * 3 * 224 * 224);
   for (size_t i = 0; i < inputs_data.size(); ++i) {
     inputs_data[i] = std::rand() % 1000 / 1000.0f;
   }
diff --git a/examples/runtime/cpp/infer_paddle_openvino.cc b/examples/runtime/cpp/infer_paddle_openvino.cc
index 3862437d049..3958cdcf0e6 100644
--- a/examples/runtime/cpp/infer_paddle_openvino.cc
+++ b/examples/runtime/cpp/infer_paddle_openvino.cc
@@ -44,7 +44,7 @@ int main(int argc, char* argv[]) {
   std::vector<fd::FDTensor> output_tensors(1);
 
   std::vector<float> inputs_data;
-  inputs_data.reserve(1 * 3 * 224 * 224);
+  inputs_data.resize(1 * 3 * 224 * 224);
   for (size_t i = 0; i < inputs_data.size(); ++i) {
     inputs_data[i] = std::rand() % 1000 / 1000.0f;
   }
diff --git a/examples/runtime/cpp/infer_paddle_paddle_inference.cc b/examples/runtime/cpp/infer_paddle_paddle_inference.cc
index ac34f9bf8e3..1d0bd82ad2f 100644
--- a/examples/runtime/cpp/infer_paddle_paddle_inference.cc
+++ b/examples/runtime/cpp/infer_paddle_paddle_inference.cc
@@ -49,7 +49,7 @@ int main(int argc, char* argv[]) {
   std::vector<fd::FDTensor> output_tensors(1);
 
   std::vector<float> inputs_data;
-  inputs_data.reserve(1 * 3 * 224 * 224);
+  inputs_data.resize(1 * 3 * 224 * 224);
   for (size_t i = 0; i < inputs_data.size(); ++i) {
     inputs_data[i] = std::rand() % 1000 / 1000.0f;
   }
diff --git a/examples/runtime/cpp/infer_paddle_tensorrt.cc b/examples/runtime/cpp/infer_paddle_tensorrt.cc
index 7189ee87d0d..04fe311b2c7 100644
--- a/examples/runtime/cpp/infer_paddle_tensorrt.cc
+++ b/examples/runtime/cpp/infer_paddle_tensorrt.cc
@@ -45,7 +45,7 @@ int main(int argc, char* argv[]) {
   std::vector<fd::FDTensor> output_tensors(1);
 
   std::vector<float> inputs_data;
-  inputs_data.reserve(1 * 3 * 224 * 224);
+  inputs_data.resize(1 * 3 * 224 * 224);
   for (size_t i = 0; i < inputs_data.size(); ++i) {
     inputs_data[i] = std::rand() % 1000 / 1000.0f;
   }

From e6f4e63acbcc51c53b4f29858d09dc045f2f56d2 Mon Sep 17 00:00:00 2001
From: Jason <928090362@qq.com>
Date: Mon, 7 Nov 2022 19:48:58 +0800
Subject: [PATCH 10/50] Update infer_paddle_tensorrt.py

---
 examples/runtime/python/infer_paddle_tensorrt.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/runtime/python/infer_paddle_tensorrt.py b/examples/runtime/python/infer_paddle_tensorrt.py
index 8388656c79f..94c95cb872d 100644
--- a/examples/runtime/python/infer_paddle_tensorrt.py
+++ b/examples/runtime/python/infer_paddle_tensorrt.py
@@ -27,7 +27,8 @@
 # **** GPU 配置 ***
 option.use_gpu(0)
 option.use_trt_backend()
-option.enable_paddle_to_trt()
+# using TensorRT integrated in Paddle Inference
+# option.enable_paddle_to_trt()
 
 # 初始化构造runtime
 runtime = fd.Runtime(option)

From 1952c99bfed66e0aa29e02c4f9f27f9024021985 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Tue, 8 Nov 2022 07:21:16 +0000
Subject: [PATCH 11/50] Add runtime quick start

---
 docs/cn/quick_start/runtime/cpp.md    | 116 ++++++++++++++++++++++++++
 docs/cn/quick_start/runtime/python.md |  51 +++++++++++
 2 files changed, 167 insertions(+)

diff --git a/docs/cn/quick_start/runtime/cpp.md b/docs/cn/quick_start/runtime/cpp.md
index 7d52d9b58be..a4ed9485092 100644
--- a/docs/cn/quick_start/runtime/cpp.md
+++ b/docs/cn/quick_start/runtime/cpp.md
@@ -1 +1,117 @@
 # C++推理
+
+确认开发环境已准备FastDeploy C++部署库，参考[FastDeploy安装](../../build_and_install/)安装预编译的FastDeploy，或根据自己需求进行编译安装。
+
+本文档以 PaddleClas 分类模型 MobileNetV2 为例展示CPU上的推理示例
+
+## 1. 获取模型
+
+```bash
+wget https://bj.bcebos.com/fastdeploy/models/mobilenetv2.tgz
+tar xvf mobilenetv2.tgz
+```
+
+## 2. 配置后端
+
+如下C++代码保存为`infer_paddle_onnxruntime.cc`
+
+``` c++
+#include "fastdeploy/runtime.h"
+
+namespace fd = fastdeploy;
+
+int main(int argc, char* argv[]) {
+  std::string model_file = "mobilenetv2/inference.pdmodel";
+  std::string params_file = "mobilenetv2/inference.pdiparams";
+
+  // setup option
+  fd::RuntimeOption runtime_option;
+  runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
+  runtime_option.UseOrtBackend();
+  runtime_option.SetCpuThreadNum(12);
+  // init runtime
+  std::unique_ptr<fd::Runtime> runtime =
+      std::unique_ptr<fd::Runtime>(new fd::Runtime());
+  if (!runtime->Init(runtime_option)) {
+    std::cerr << "--- Init FastDeploy Runitme Failed! "
+              << "\n--- Model:  " << model_file << std::endl;
+    return -1;
+  } else {
+    std::cout << "--- Init FastDeploy Runitme Done! "
+              << "\n--- Model:  " << model_file << std::endl;
+  }
+  // init input tensor shape
+  fd::TensorInfo info = runtime->GetInputInfo(0);
+  info.shape = {1, 3, 224, 224};
+
+  std::vector<fd::FDTensor> input_tensors(1);
+  std::vector<fd::FDTensor> output_tensors(1);
+
+  std::vector<float> inputs_data;
+  inputs_data.resize(1 * 3 * 224 * 224);
+  for (size_t i = 0; i < inputs_data.size(); ++i) {
+    inputs_data[i] = std::rand() % 1000 / 1000.0f;
+  }
+  input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
+
+  //get input name
+  input_tensors[0].name = info.name;
+
+  runtime->Infer(input_tensors, &output_tensors);
+
+  output_tensors[0].PrintInfo();
+  return 0;
+}
+```
+加载完成，会输出提示如下，说明初始化的后端，以及运行的硬件设备
+```
+[INFO] fastdeploy/fastdeploy_runtime.cc(283)::Init	Runtime initialized with Backend::OrtBackend in device Device::CPU.
+```
+
+## 3. 准备CMakeLists.txt
+
+FastDeploy中包含多个依赖库，直接采用`g++`或编译器编译较为繁杂，推荐使用cmake进行编译配置。示例配置如下，
+
+```cmake
+PROJECT(runtime_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.12)
+
+# 指定下载解压后的fastdeploy库路径
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+# 添加FastDeploy依赖头文件
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(runtime_demo ${PROJECT_SOURCE_DIR}/infer_onnx_openvino.cc)
+# 添加FastDeploy库依赖
+target_link_libraries(runtime_demo ${FASTDEPLOY_LIBS})
+```
+
+## 4. 编译可执行程序
+
+打开命令行终端，进入`infer_paddle_onnxruntime.cc`和`CMakeLists.txt`所在的目录，执行如下命令
+
+```bash
+cd examples/runtime/cpp
+mkdir build & cd build
+cmake .. -DFASTDEPLOY_INSTALL_DIR=$fastdeploy_cpp_sdk
+make -j
+```
+
+```fastdeploy_cpp_sdk``` 为FastDeploy C++部署库路径
+
+编译完成后，使用如下命令执行可得到预测结果
+```bash
+./runtime_demo
+```
+执行时如提示`error while loading shared libraries: libxxx.so: cannot open shared object file: No such file...`，说明程序执行时没有找到FastDeploy的库路径，可通过执行如下命令，将FastDeploy的库路径添加到环境变量之后，重新执行二进制程序。
+```bash
+source /Path/to/fastdeploy_cpp_sdk/fastdeploy_init.sh
+```
+
+## 其它文档
+
+- [不同后端Runtime demo示例](../../../../examples/runtime/README.md)
+- [切换模型推理的硬件和后端](../../faq/how_to_change_backend.md)
diff --git a/docs/cn/quick_start/runtime/python.md b/docs/cn/quick_start/runtime/python.md
index cb2c6efd227..23e78956fb2 100644
--- a/docs/cn/quick_start/runtime/python.md
+++ b/docs/cn/quick_start/runtime/python.md
@@ -1 +1,52 @@
 # Python推理
+
+确认开发环境已安装FastDeploy，参考[FastDeploy安装](../../build_and_install/)安装预编译的FastDeploy，或根据自己需求进行编译安装。
+
+本文档以 PaddleClas 分类模型 MobileNetV2 为例展示CPU上的推理示例
+
+## 1. 获取模型
+
+``` python
+import fastdeploy as fd
+
+model_url = "https://bj.bcebos.com/fastdeploy/models/mobilenetv2.tgz"
+fd.download_and_decompress(model_url, path=".")
+```
+
+## 2. 配置后端
+
+- 更多后端的示例可参考[examples/runtime](https://github.com/PaddlePaddle/FastDeploy/tree/develop/examples/runtime)
+
+``` python
+option = fd.RuntimeOption()
+
+option.set_model_path("mobilenetv2/inference.pdmodel",
+                      "mobilenetv2/inference.pdiparams")
+
+# **** CPU 配置 ****
+option.use_cpu()
+option.use_ort_backend()
+option.set_cpu_thread_num(12)
+
+# 初始化构造runtime
+runtime = fd.Runtime(option)
+
+# 获取模型输入名
+input_name = runtime.get_input_info(0).name
+
+# 构造随机数据进行推理
+results = runtime.infer({
+    input_name: np.random.rand(1, 3, 224, 224).astype("float32")
+})
+
+print(results[0].shape)
+```
+加载完成，会输出提示如下，说明初始化的后端，以及运行的硬件设备
+```
+[INFO] fastdeploy/fastdeploy_runtime.cc(283)::Init	Runtime initialized with Backend::OrtBackend in device Device::CPU.
+```
+
+## 其它文档
+
+- [不同后端Runtime demo示例](../../../../examples/runtime/README.md)
+- [切换模型推理的硬件和后端](../../faq/how_to_change_backend.md)

From d495b8e8081824e5606e1d255b80192abb2b4165 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Tue, 8 Nov 2022 11:09:11 +0000
Subject: [PATCH 12/50] deal with comments

---
 docs/cn/quick_start/runtime/cpp.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/cn/quick_start/runtime/cpp.md b/docs/cn/quick_start/runtime/cpp.md
index a4ed9485092..5fe86c7b6a5 100644
--- a/docs/cn/quick_start/runtime/cpp.md
+++ b/docs/cn/quick_start/runtime/cpp.md
@@ -111,6 +111,8 @@ make -j
 source /Path/to/fastdeploy_cpp_sdk/fastdeploy_init.sh
 ```
 
+本示例代码在各平台(Windows/Linux/Mac)上通用，但编译过程仅支持(Linux/Mac)，Windows上使用msbuild进行编译，具体使用方式参考[Windows平台使用FastDeploy C++ SDK](../../faq/use_sdk_on_windows.md)
+
 ## 其它文档
 
 - [不同后端Runtime demo示例](../../../../examples/runtime/README.md)

From 731b8220650eb47d40d5b87d5c26d7bc57a6f83e Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Tue, 8 Nov 2022 12:47:09 +0000
Subject: [PATCH 13/50] fixed reused_input_tensors&&reused_output_tensors

---
 examples/runtime/README.md                    |  14 ++
 examples/runtime/cpp/README.md                | 121 ++++++++++++++++++
 examples/runtime/python/README.md             |  53 ++++++++
 .../vision/classification/yolov5cls/README.md |   2 -
 fastdeploy/fastdeploy_model.cc                |   2 +-
 fastdeploy/fastdeploy_model.h                 |  19 ++-
 .../vision/classification/ppcls/model.cc      |   8 +-
 .../vision/detection/contrib/scaledyolov4.cc  |   8 +-
 fastdeploy/vision/detection/contrib/yolor.cc  |   8 +-
 fastdeploy/vision/detection/contrib/yolov5.cc |  10 +-
 .../vision/detection/contrib/yolov5lite.cc    |  12 +-
 fastdeploy/vision/detection/contrib/yolov6.cc |  10 +-
 fastdeploy/vision/detection/contrib/yolov7.cc |  10 +-
 .../detection/contrib/yolov7end2end_ort.cc    |   8 +-
 .../detection/contrib/yolov7end2end_trt.cc    |  10 +-
 fastdeploy/vision/detection/contrib/yolox.cc  |  10 +-
 fastdeploy/vision/detection/ppdet/ppyoloe.cc  |   6 +-
 17 files changed, 247 insertions(+), 64 deletions(-)
 mode change 100644 => 100755 examples/runtime/README.md
 create mode 100644 examples/runtime/cpp/README.md
 create mode 100644 examples/runtime/python/README.md
 mode change 100644 => 100755 fastdeploy/fastdeploy_model.cc
 mode change 100644 => 100755 fastdeploy/fastdeploy_model.h
 mode change 100644 => 100755 fastdeploy/vision/classification/ppcls/model.cc
 mode change 100644 => 100755 fastdeploy/vision/detection/contrib/scaledyolov4.cc
 mode change 100644 => 100755 fastdeploy/vision/detection/contrib/yolor.cc
 mode change 100644 => 100755 fastdeploy/vision/detection/contrib/yolov5.cc
 mode change 100644 => 100755 fastdeploy/vision/detection/contrib/yolov5lite.cc
 mode change 100644 => 100755 fastdeploy/vision/detection/contrib/yolov6.cc
 mode change 100644 => 100755 fastdeploy/vision/detection/contrib/yolov7.cc
 mode change 100644 => 100755 fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc
 mode change 100644 => 100755 fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc
 mode change 100644 => 100755 fastdeploy/vision/detection/contrib/yolox.cc
 mode change 100644 => 100755 fastdeploy/vision/detection/ppdet/ppyoloe.cc

diff --git a/examples/runtime/README.md b/examples/runtime/README.md
old mode 100644
new mode 100755
index 18651bd6995..b434bc99eb0
--- a/examples/runtime/README.md
+++ b/examples/runtime/README.md
@@ -1,5 +1,9 @@
 # FastDeploy Runtime examples
 
+FastDeploy Runtime C++ 推理示例如下
+
+## Python 示例
+
 | Example Code | Program Language | Description |
 | :------- | :------- | :---- |
 | python/infer_paddle_paddle_inference.py | Python | Deploy Paddle model with Paddle Inference(CPU/GPU) |
@@ -8,9 +12,19 @@
 | python/infer_paddle_onnxruntime.py | Python | Deploy Paddle model with ONNX Runtime(CPU/GPU)  |
 | python/infer_onnx_openvino.py | Python | Deploy ONNX model with OpenVINO(CPU) |
 | python/infer_onnx_tensorrt.py | Python | Deploy ONNX model with TensorRT(GPU) |
+
+## C++ 示例
+
+| Example Code | Program Language | Description |
+| :------- | :------- | :---- |
 | cpp/infer_paddle_paddle_inference.cc | C++ | Deploy Paddle model with Paddle Inference(CPU/GPU) |
 | cpp/infer_paddle_tensorrt.cc | C++ | Deploy Paddle model with TensorRT(GPU) |
 | cpp/infer_paddle_openvino.cc | C++ | Deploy Paddle model with OpenVINO(CPU |
 | cpp/infer_paddle_onnxruntime.cc | C++ | Deploy Paddle model with ONNX Runtime(CPU/GPU) |
 | cpp/infer_onnx_openvino.cc | C++ | Deploy ONNX model with OpenVINO(CPU) |
 | cpp/infer_onnx_tensorrt.cc | C++ | Deploy ONNX model with TensorRT(GPU) |
+
+## 详细部署文档
+
+- [Python部署](python)
+- [C++部署](cpp)
diff --git a/examples/runtime/cpp/README.md b/examples/runtime/cpp/README.md
new file mode 100644
index 00000000000..9de8b1d6271
--- /dev/null
+++ b/examples/runtime/cpp/README.md
@@ -0,0 +1,121 @@
+# C++推理
+
+在运行demo前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+本文档以 PaddleClas 分类模型 MobileNetV2 为例展示CPU上的推理示例
+
+## 1. 获取模型
+
+```bash
+wget https://bj.bcebos.com/fastdeploy/models/mobilenetv2.tgz
+tar xvf mobilenetv2.tgz
+```
+
+## 2. 配置后端
+
+如下C++代码保存为`infer_paddle_onnxruntime.cc`
+
+``` c++
+#include "fastdeploy/runtime.h"
+
+namespace fd = fastdeploy;
+
+int main(int argc, char* argv[]) {
+  std::string model_file = "mobilenetv2/inference.pdmodel";
+  std::string params_file = "mobilenetv2/inference.pdiparams";
+
+  // setup option
+  fd::RuntimeOption runtime_option;
+  runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
+  runtime_option.UseOrtBackend();
+  runtime_option.SetCpuThreadNum(12);
+  // init runtime
+  std::unique_ptr<fd::Runtime> runtime =
+      std::unique_ptr<fd::Runtime>(new fd::Runtime());
+  if (!runtime->Init(runtime_option)) {
+    std::cerr << "--- Init FastDeploy Runitme Failed! "
+              << "\n--- Model:  " << model_file << std::endl;
+    return -1;
+  } else {
+    std::cout << "--- Init FastDeploy Runitme Done! "
+              << "\n--- Model:  " << model_file << std::endl;
+  }
+  // init input tensor shape
+  fd::TensorInfo info = runtime->GetInputInfo(0);
+  info.shape = {1, 3, 224, 224};
+
+  std::vector<fd::FDTensor> input_tensors(1);
+  std::vector<fd::FDTensor> output_tensors(1);
+
+  std::vector<float> inputs_data;
+  inputs_data.resize(1 * 3 * 224 * 224);
+  for (size_t i = 0; i < inputs_data.size(); ++i) {
+    inputs_data[i] = std::rand() % 1000 / 1000.0f;
+  }
+  input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
+
+  //get input name
+  input_tensors[0].name = info.name;
+
+  runtime->Infer(input_tensors, &output_tensors);
+
+  output_tensors[0].PrintInfo();
+  return 0;
+}
+```
+加载完成，会输出提示如下，说明初始化的后端，以及运行的硬件设备
+```
+[INFO] fastdeploy/fastdeploy_runtime.cc(283)::Init	Runtime initialized with Backend::OrtBackend in device Device::CPU.
+```
+
+## 3. 准备CMakeLists.txt
+
+FastDeploy中包含多个依赖库，直接采用`g++`或编译器编译较为繁杂，推荐使用cmake进行编译配置。示例配置如下，
+
+```cmake
+PROJECT(runtime_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.12)
+
+# 指定下载解压后的fastdeploy库路径
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+# 添加FastDeploy依赖头文件
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(runtime_demo ${PROJECT_SOURCE_DIR}/infer_onnx_openvino.cc)
+# 添加FastDeploy库依赖
+target_link_libraries(runtime_demo ${FASTDEPLOY_LIBS})
+```
+
+## 4. 编译可执行程序
+
+打开命令行终端，进入`infer_paddle_onnxruntime.cc`和`CMakeLists.txt`所在的目录，执行如下命令
+
+```bash
+mkdir build & cd build
+cmake .. -DFASTDEPLOY_INSTALL_DIR=$fastdeploy_cpp_sdk
+make -j
+```
+
+```fastdeploy_cpp_sdk``` 为FastDeploy C++部署库路径
+
+编译完成后，使用如下命令执行可得到预测结果
+```bash
+./runtime_demo
+```
+执行时如提示`error while loading shared libraries: libxxx.so: cannot open shared object file: No such file...`，说明程序执行时没有找到FastDeploy的库路径，可通过执行如下命令，将FastDeploy的库路径添加到环境变量之后，重新执行二进制程序。
+```bash
+source /Path/to/fastdeploy_cpp_sdk/fastdeploy_init.sh
+```
+
+本示例代码在各平台(Windows/Linux/Mac)上通用，但编译过程仅支持(Linux/Mac)，Windows上使用msbuild进行编译，具体使用方式参考[Windows平台使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md)
+
+## 其它文档
+
+- [Runtime Python 示例](../python)
+- [切换模型推理的硬件和后端](../../../../../docs/cn/faq/how_to_change_backend.md)
diff --git a/examples/runtime/python/README.md b/examples/runtime/python/README.md
new file mode 100644
index 00000000000..c9692fca6b1
--- /dev/null
+++ b/examples/runtime/python/README.md
@@ -0,0 +1,53 @@
+# Python推理
+
+在运行demo前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+本文档以 PaddleClas 分类模型 MobileNetV2 为例展示 CPU 上的推理示例
+
+## 1. 获取模型
+
+``` python
+import fastdeploy as fd
+
+model_url = "https://bj.bcebos.com/fastdeploy/models/mobilenetv2.tgz"
+fd.download_and_decompress(model_url, path=".")
+```
+
+## 2. 配置后端
+
+``` python
+option = fd.RuntimeOption()
+
+option.set_model_path("mobilenetv2/inference.pdmodel",
+                      "mobilenetv2/inference.pdiparams")
+
+# **** CPU 配置 ****
+option.use_cpu()
+option.use_ort_backend()
+option.set_cpu_thread_num(12)
+
+# 初始化构造runtime
+runtime = fd.Runtime(option)
+
+# 获取模型输入名
+input_name = runtime.get_input_info(0).name
+
+# 构造随机数据进行推理
+results = runtime.infer({
+    input_name: np.random.rand(1, 3, 224, 224).astype("float32")
+})
+
+print(results[0].shape)
+```
+加载完成，会输出提示如下，说明初始化的后端，以及运行的硬件设备
+```
+[INFO] fastdeploy/fastdeploy_runtime.cc(283)::Init	Runtime initialized with Backend::OrtBackend in device Device::CPU.
+```
+
+## 其它文档
+
+- [Runtime C++ 示例](../cpp)
+- [切换模型推理的硬件和后端](../../../../../docs/cn/faq/how_to_change_backend.md)
diff --git a/examples/vision/classification/yolov5cls/README.md b/examples/vision/classification/yolov5cls/README.md
index 9ed02b7286f..468c9d963b1 100644
--- a/examples/vision/classification/yolov5cls/README.md
+++ b/examples/vision/classification/yolov5cls/README.md
@@ -17,8 +17,6 @@
 | [YOLOv5x-cls](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5x-cls.onnx) | 184MB | 79.0% | 94.4% |
 
 
-
-
 ## 详细部署文档
 
 - [Python部署](python)
diff --git a/fastdeploy/fastdeploy_model.cc b/fastdeploy/fastdeploy_model.cc
old mode 100644
new mode 100755
index ce18b7eb9be..828bc4878da
--- a/fastdeploy/fastdeploy_model.cc
+++ b/fastdeploy/fastdeploy_model.cc
@@ -239,7 +239,7 @@ bool FastDeployModel::Infer(std::vector<FDTensor>& input_tensors,
 }
 
 bool FastDeployModel::Infer() {
-  return Infer(reused_input_tensors, &reused_output_tensors);
+  return Infer(reused_input_tensors_, &reused_output_tensors_);
 }
 
 std::map<std::string, float> FastDeployModel::PrintStatisInfoOfRuntime() {
diff --git a/fastdeploy/fastdeploy_model.h b/fastdeploy/fastdeploy_model.h
old mode 100644
new mode 100755
index 5a6cd3736db..75e67578e5a
--- a/fastdeploy/fastdeploy_model.h
+++ b/fastdeploy/fastdeploy_model.h
@@ -28,7 +28,7 @@ class FASTDEPLOY_DECL FastDeployModel {
   virtual bool Infer(std::vector<FDTensor>& input_tensors,
                      std::vector<FDTensor>* output_tensors);
 
-  /** \brief Inference the model by the runtime. This interface is using class member reused_input_tensors to do inference and writing results to reused_output_tensors
+  /** \brief Inference the model by the runtime. This interface is using class member reused_input_tensors_ to do inference and writing results to reused_output_tensors_
   */
   virtual bool Infer();
 
@@ -107,17 +107,10 @@ class FASTDEPLOY_DECL FastDeployModel {
   /** \brief Release reused input/output buffers
   */
   virtual void ReleaseReusedBuffer() {
-    std::vector<FDTensor>().swap(reused_input_tensors);
-    std::vector<FDTensor>().swap(reused_output_tensors);
+    std::vector<FDTensor>().swap(reused_input_tensors_);
+    std::vector<FDTensor>().swap(reused_output_tensors_);
   }
 
-  /** \brief Reused input tensors
-  */
-  std::vector<FDTensor> reused_input_tensors;
-  /** \brief Reused output tensors
-  */
-  std::vector<FDTensor> reused_output_tensors;
-
  protected:
   virtual bool InitRuntime();
   virtual bool CreateCpuBackend();
@@ -126,7 +119,11 @@ class FASTDEPLOY_DECL FastDeployModel {
   virtual bool CreateRKNPUBackend();
 
   bool initialized = false;
-  std::vector<Backend> valid_external_backends;
+  std::vector<Backend> valid_external_backends_;
+  // Reused input tensors
+  std::vector<FDTensor> reused_input_tensors_;
+  // Reused output tensors
+  std::vector<FDTensor> reused_output_tensors_;
 
  private:
   std::shared_ptr<Runtime> runtime_;
diff --git a/fastdeploy/vision/classification/ppcls/model.cc b/fastdeploy/vision/classification/ppcls/model.cc
old mode 100644
new mode 100755
index 5f88e0a724f..a9a8182e3ea
--- a/fastdeploy/vision/classification/ppcls/model.cc
+++ b/fastdeploy/vision/classification/ppcls/model.cc
@@ -60,18 +60,18 @@ bool PaddleClasModel::Predict(const cv::Mat& im, ClassifyResult* result) {
 
 bool PaddleClasModel::BatchPredict(const std::vector<cv::Mat>& images, std::vector<ClassifyResult>* results) {
   std::vector<FDMat> fd_images = WrapMat(images);
-  if (!preprocessor_.Run(&fd_images, &reused_input_tensors)) {
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
     FDERROR << "Failed to preprocess the input image." << std::endl;
     return false;
   }
 
-  reused_input_tensors[0].name = InputInfoOfRuntime(0).name;
-  if (!Infer(reused_input_tensors, &reused_output_tensors)) {
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
     FDERROR << "Failed to inference by runtime." << std::endl;
     return false;
   }
 
-  if (!postprocessor_.Run(reused_output_tensors, results)) {
+  if (!postprocessor_.Run(reused_output_tensors_, results)) {
     FDERROR << "Failed to postprocess the inference results by runtime." << std::endl;
     return false;
   }
diff --git a/fastdeploy/vision/detection/contrib/scaledyolov4.cc b/fastdeploy/vision/detection/contrib/scaledyolov4.cc
old mode 100644
new mode 100755
index 46413438e82..694d7dd58f6
--- a/fastdeploy/vision/detection/contrib/scaledyolov4.cc
+++ b/fastdeploy/vision/detection/contrib/scaledyolov4.cc
@@ -84,7 +84,7 @@ bool ScaledYOLOv4::Initialize() {
   is_scale_up = false;
   stride = 32;
   max_wh = 7680.0;
-  reused_input_tensors.resize(1);
+  reused_input_tensors_.resize(1);
 
   if (!InitRuntime()) {
     FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
@@ -230,17 +230,17 @@ bool ScaledYOLOv4::Predict(cv::Mat* im, DetectionResult* result,
   im_info["output_shape"] = {static_cast<float>(mat.Height()),
                              static_cast<float>(mat.Width())};
 
-  if (!Preprocess(&mat, &reused_input_tensors[0], &im_info)) {
+  if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) {
     FDERROR << "Failed to preprocess input image." << std::endl;
     return false;
   }
 
-  reused_input_tensors[0].name = InputInfoOfRuntime(0).name;
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
   if (!Infer()) {
     FDERROR << "Failed to inference." << std::endl;
     return false;
   }
-  if (!Postprocess(reused_output_tensors[0], result, im_info, conf_threshold,
+  if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold,
                    nms_iou_threshold)) {
     FDERROR << "Failed to post process." << std::endl;
     return false;
diff --git a/fastdeploy/vision/detection/contrib/yolor.cc b/fastdeploy/vision/detection/contrib/yolor.cc
old mode 100644
new mode 100755
index 5852e72067b..31c56f57696
--- a/fastdeploy/vision/detection/contrib/yolor.cc
+++ b/fastdeploy/vision/detection/contrib/yolor.cc
@@ -83,7 +83,7 @@ bool YOLOR::Initialize() {
   is_scale_up = false;
   stride = 32;
   max_wh = 7680.0;
-  reused_input_tensors.resize(1);
+  reused_input_tensors_.resize(1);
 
   if (!InitRuntime()) {
     FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
@@ -227,18 +227,18 @@ bool YOLOR::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold,
   im_info["output_shape"] = {static_cast<float>(mat.Height()),
                              static_cast<float>(mat.Width())};
 
-  if (!Preprocess(&mat, &reused_input_tensors[0], &im_info)) {
+  if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) {
     FDERROR << "Failed to preprocess input image." << std::endl;
     return false;
   }
 
-  reused_input_tensors[0].name = InputInfoOfRuntime(0).name;
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
   if (!Infer()) {
     FDERROR << "Failed to inference." << std::endl;
     return false;
   }
 
-  if (!Postprocess(reused_output_tensors[0], result, im_info, conf_threshold,
+  if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold,
                    nms_iou_threshold)) {
     FDERROR << "Failed to post process." << std::endl;
     return false;
diff --git a/fastdeploy/vision/detection/contrib/yolov5.cc b/fastdeploy/vision/detection/contrib/yolov5.cc
old mode 100644
new mode 100755
index 27f74fd5516..ba5b22363ec
--- a/fastdeploy/vision/detection/contrib/yolov5.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5.cc
@@ -93,7 +93,7 @@ bool YOLOv5::Initialize() {
   stride_ = 32;
   max_wh_ = 7680.0;
   multi_label_ = true;
-  reused_input_tensors.resize(1);
+  reused_input_tensors_.resize(1);
 
   if (!InitRuntime()) {
     FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
@@ -350,14 +350,14 @@ bool YOLOv5::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold,
   std::map<std::string, std::array<float, 2>> im_info;
 
   if (use_cuda_preprocessing_) {
-    if (!CudaPreprocess(&mat, &reused_input_tensors[0], &im_info, size_,
+    if (!CudaPreprocess(&mat, &reused_input_tensors_[0], &im_info, size_,
                         padding_value_, is_mini_pad_, is_no_pad_, is_scale_up_,
                         stride_, max_wh_, multi_label_)) {
       FDERROR << "Failed to preprocess input image." << std::endl;
       return false;
     }
   } else {
-    if (!Preprocess(&mat, &reused_input_tensors[0], &im_info, size_,
+    if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info, size_,
                     padding_value_, is_mini_pad_, is_no_pad_, is_scale_up_,
                     stride_, max_wh_, multi_label_)) {
       FDERROR << "Failed to preprocess input image." << std::endl;
@@ -365,13 +365,13 @@ bool YOLOv5::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold,
     }
   }
 
-  reused_input_tensors[0].name = InputInfoOfRuntime(0).name;
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
   if (!Infer()) {
     FDERROR << "Failed to inference." << std::endl;
     return false;
   }
 
-  if (!Postprocess(reused_output_tensors, result, im_info, conf_threshold,
+  if (!Postprocess(reused_output_tensors_, result, im_info, conf_threshold,
                    nms_iou_threshold, multi_label_)) {
     FDERROR << "Failed to post process." << std::endl;
     return false;
diff --git a/fastdeploy/vision/detection/contrib/yolov5lite.cc b/fastdeploy/vision/detection/contrib/yolov5lite.cc
old mode 100644
new mode 100755
index 6657a2bf90a..f936b8ce50e
--- a/fastdeploy/vision/detection/contrib/yolov5lite.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5lite.cc
@@ -123,7 +123,7 @@ bool YOLOv5Lite::Initialize() {
   anchor_config = {{10.0, 13.0, 16.0, 30.0, 33.0, 23.0},
                    {30.0, 61.0, 62.0, 45.0, 59.0, 119.0},
                    {116.0, 90.0, 156.0, 198.0, 373.0, 326.0}};
-  reused_input_tensors.resize(1);
+  reused_input_tensors_.resize(1);
 
   if (!InitRuntime()) {
     FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
@@ -426,31 +426,31 @@ bool YOLOv5Lite::Predict(cv::Mat* im, DetectionResult* result,
                              static_cast<float>(mat.Width())};
 
   if (use_cuda_preprocessing_) {
-    if (!CudaPreprocess(&mat, &reused_input_tensors[0], &im_info)) {
+    if (!CudaPreprocess(&mat, &reused_input_tensors_[0], &im_info)) {
       FDERROR << "Failed to preprocess input image." << std::endl;
       return false;
     }
   } else {
-    if (!Preprocess(&mat, &reused_input_tensors[0], &im_info)) {
+    if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) {
       FDERROR << "Failed to preprocess input image." << std::endl;
       return false;
     }
   }
 
-  reused_input_tensors[0].name = InputInfoOfRuntime(0).name;
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
   if (!Infer()) {
     FDERROR << "Failed to inference." << std::endl;
     return false;
   }
 
   if (is_decode_exported) {
-    if (!Postprocess(reused_output_tensors[0], result, im_info, conf_threshold,
+    if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold,
                      nms_iou_threshold)) {
       FDERROR << "Failed to post process." << std::endl;
       return false;
     }
   } else {
-    if (!PostprocessWithDecode(reused_output_tensors[0], result, im_info,
+    if (!PostprocessWithDecode(reused_output_tensors_[0], result, im_info,
                                conf_threshold, nms_iou_threshold)) {
       FDERROR << "Failed to post process." << std::endl;
       return false;
diff --git a/fastdeploy/vision/detection/contrib/yolov6.cc b/fastdeploy/vision/detection/contrib/yolov6.cc
old mode 100644
new mode 100755
index 70f79b9f47c..9d4f94d51f6
--- a/fastdeploy/vision/detection/contrib/yolov6.cc
+++ b/fastdeploy/vision/detection/contrib/yolov6.cc
@@ -96,7 +96,7 @@ bool YOLOv6::Initialize() {
   is_scale_up = false;
   stride = 32;
   max_wh = 4096.0f;
-  reused_input_tensors.resize(1);
+  reused_input_tensors_.resize(1);
 
   if (!InitRuntime()) {
     FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
@@ -311,24 +311,24 @@ bool YOLOv6::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold,
                              static_cast<float>(mat.Width())};
 
   if (use_cuda_preprocessing_) {
-    if (!CudaPreprocess(&mat, &reused_input_tensors[0], &im_info)) {
+    if (!CudaPreprocess(&mat, &reused_input_tensors_[0], &im_info)) {
       FDERROR << "Failed to preprocess input image." << std::endl;
       return false;
     }
   } else {
-    if (!Preprocess(&mat, &reused_input_tensors[0], &im_info)) {
+    if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) {
       FDERROR << "Failed to preprocess input image." << std::endl;
       return false;
     }
   }
 
-  reused_input_tensors[0].name = InputInfoOfRuntime(0).name;
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
   if (!Infer()) {
     FDERROR << "Failed to inference." << std::endl;
     return false;
   }
 
-  if (!Postprocess(reused_output_tensors[0], result, im_info, conf_threshold,
+  if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold,
                    nms_iou_threshold)) {
     FDERROR << "Failed to post process." << std::endl;
     return false;
diff --git a/fastdeploy/vision/detection/contrib/yolov7.cc b/fastdeploy/vision/detection/contrib/yolov7.cc
old mode 100644
new mode 100755
index c3fc1de4148..5b4ca4d46bb
--- a/fastdeploy/vision/detection/contrib/yolov7.cc
+++ b/fastdeploy/vision/detection/contrib/yolov7.cc
@@ -94,7 +94,7 @@ bool YOLOv7::Initialize() {
   is_scale_up = false;
   stride = 32;
   max_wh = 7680.0;
-  reused_input_tensors.resize(1);
+  reused_input_tensors_.resize(1);
 
   if (!InitRuntime()) {
     FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
@@ -313,24 +313,24 @@ bool YOLOv7::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold,
                              static_cast<float>(mat.Width())};
 
   if (use_cuda_preprocessing_) {
-    if (!CudaPreprocess(&mat, &reused_input_tensors[0], &im_info)) {
+    if (!CudaPreprocess(&mat, &reused_input_tensors_[0], &im_info)) {
       FDERROR << "Failed to preprocess input image." << std::endl;
       return false;
     }
   } else {
-    if (!Preprocess(&mat, &reused_input_tensors[0], &im_info)) {
+    if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) {
       FDERROR << "Failed to preprocess input image." << std::endl;
       return false;
     }
   }
 
-  reused_input_tensors[0].name = InputInfoOfRuntime(0).name;
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
   if (!Infer()) {
     FDERROR << "Failed to inference." << std::endl;
     return false;
   }
 
-  if (!Postprocess(reused_output_tensors[0], result, im_info, conf_threshold,
+  if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold,
                    nms_iou_threshold)) {
     FDERROR << "Failed to post process." << std::endl;
     return false;
diff --git a/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc b/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc
old mode 100644
new mode 100755
index 80c14e9a2c7..6a86000e94b
--- a/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc
+++ b/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc
@@ -86,7 +86,7 @@ bool YOLOv7End2EndORT::Initialize() {
   is_no_pad = false;
   is_scale_up = false;
   stride = 32;
-  reused_input_tensors.resize(1);
+  reused_input_tensors_.resize(1);
 
   if (!InitRuntime()) {
     FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
@@ -224,18 +224,18 @@ bool YOLOv7End2EndORT::Predict(cv::Mat* im, DetectionResult* result,
   im_info["output_shape"] = {static_cast<float>(mat.Height()),
                              static_cast<float>(mat.Width())};
 
-  if (!Preprocess(&mat, &reused_input_tensors[0], &im_info)) {
+  if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) {
     FDERROR << "Failed to preprocess input image." << std::endl;
     return false;
   }
 
-  reused_input_tensors[0].name = InputInfoOfRuntime(0).name;
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
   if (!Infer()) {
     FDERROR << "Failed to inference." << std::endl;
     return false;
   }
 
-  if (!Postprocess(reused_output_tensors[0], result, im_info, conf_threshold)) {
+  if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold)) {
     FDERROR << "Failed to post process." << std::endl;
     return false;
   }
diff --git a/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc b/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc
old mode 100644
new mode 100755
index 6fabd53812a..671d00ddb84
--- a/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc
+++ b/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc
@@ -106,7 +106,7 @@ bool YOLOv7End2EndTRT::Initialize() {
   is_no_pad = false;
   is_scale_up = false;
   stride = 32;
-  reused_input_tensors.resize(1);
+  reused_input_tensors_.resize(1);
 
   if (!InitRuntime()) {
     FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
@@ -320,24 +320,24 @@ bool YOLOv7End2EndTRT::Predict(cv::Mat* im, DetectionResult* result,
                              static_cast<float>(mat.Width())};
 
   if (use_cuda_preprocessing_) {
-    if (!CudaPreprocess(&mat, &reused_input_tensors[0], &im_info)) {
+    if (!CudaPreprocess(&mat, &reused_input_tensors_[0], &im_info)) {
       FDERROR << "Failed to preprocess input image." << std::endl;
       return false;
     }
   } else {
-    if (!Preprocess(&mat, &reused_input_tensors[0], &im_info)) {
+    if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) {
       FDERROR << "Failed to preprocess input image." << std::endl;
       return false;
     }
   }
 
-  reused_input_tensors[0].name = InputInfoOfRuntime(0).name;
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
   if (!Infer()) {
     FDERROR << "Failed to inference." << std::endl;
     return false;
   }
 
-  if (!Postprocess(reused_output_tensors, result, im_info, conf_threshold)) {
+  if (!Postprocess(reused_output_tensors_, result, im_info, conf_threshold)) {
     FDERROR << "Failed to post process." << std::endl;
     return false;
   }
diff --git a/fastdeploy/vision/detection/contrib/yolox.cc b/fastdeploy/vision/detection/contrib/yolox.cc
old mode 100644
new mode 100755
index afac5d671ee..c1c07182633
--- a/fastdeploy/vision/detection/contrib/yolox.cc
+++ b/fastdeploy/vision/detection/contrib/yolox.cc
@@ -96,7 +96,7 @@ bool YOLOX::Initialize() {
   downsample_strides = {8, 16, 32};
   max_wh = 4096.0f;
   is_decode_exported = false;
-  reused_input_tensors.resize(1);
+  reused_input_tensors_.resize(1);
 
   if (!InitRuntime()) {
     FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
@@ -290,25 +290,25 @@ bool YOLOX::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold,
   im_info["output_shape"] = {static_cast<float>(mat.Height()),
                              static_cast<float>(mat.Width())};
 
-  if (!Preprocess(&mat, &reused_input_tensors[0], &im_info)) {
+  if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) {
     FDERROR << "Failed to preprocess input image." << std::endl;
     return false;
   }
 
-  reused_input_tensors[0].name = InputInfoOfRuntime(0).name;
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
   if (!Infer()) {
     FDERROR << "Failed to inference." << std::endl;
     return false;
   }
 
   if (is_decode_exported) {
-    if (!Postprocess(reused_output_tensors[0], result, im_info, conf_threshold,
+    if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold,
                      nms_iou_threshold)) {
       FDERROR << "Failed to post process." << std::endl;
       return false;
     }
   } else {
-    if (!PostprocessWithDecode(reused_output_tensors[0], result, im_info,
+    if (!PostprocessWithDecode(reused_output_tensors_[0], result, im_info,
                                conf_threshold, nms_iou_threshold)) {
       FDERROR << "Failed to post process." << std::endl;
       return false;
diff --git a/fastdeploy/vision/detection/ppdet/ppyoloe.cc b/fastdeploy/vision/detection/ppdet/ppyoloe.cc
old mode 100644
new mode 100755
index 77400c739d7..00a82ace528
--- a/fastdeploy/vision/detection/ppdet/ppyoloe.cc
+++ b/fastdeploy/vision/detection/ppdet/ppyoloe.cc
@@ -55,7 +55,7 @@ bool PPYOLOE::Initialize() {
     FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
     return false;
   }
-  reused_input_tensors.resize(2);
+  reused_input_tensors_.resize(2);
 
   return true;
 }
@@ -252,7 +252,7 @@ bool PPYOLOE::Postprocess(std::vector<FDTensor>& infer_result,
 bool PPYOLOE::Predict(cv::Mat* im, DetectionResult* result) {
   Mat mat(*im);
 
-  if (!Preprocess(&mat, &reused_input_tensors)) {
+  if (!Preprocess(&mat, &reused_input_tensors_)) {
     FDERROR << "Failed to preprocess input data while using model:"
             << ModelName() << "." << std::endl;
     return false;
@@ -264,7 +264,7 @@ bool PPYOLOE::Predict(cv::Mat* im, DetectionResult* result) {
     return false;
   }
 
-  if (!Postprocess(reused_output_tensors, result)) {
+  if (!Postprocess(reused_output_tensors_, result)) {
     FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
             << std::endl;
     return false;

From c5c741dd032f8dc2396f5e9796ed8bf3baf283cb Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Wed, 9 Nov 2022 06:28:41 +0000
Subject: [PATCH 14/50] fixed docs

---
 .../download_prebuilt_libraries.md            |  5 +++-
 .../download_prebuilt_libraries.md            | 25 +++++++++++--------
 2 files changed, 18 insertions(+), 12 deletions(-)
 mode change 100644 => 100755 docs/en/build_and_install/download_prebuilt_libraries.md

diff --git a/docs/cn/build_and_install/download_prebuilt_libraries.md b/docs/cn/build_and_install/download_prebuilt_libraries.md
index 1f89a023f61..40a5878ec2a 100755
--- a/docs/cn/build_and_install/download_prebuilt_libraries.md
+++ b/docs/cn/build_and_install/download_prebuilt_libraries.md
@@ -1,8 +1,11 @@
-
 # 预编译库安装
 
 FastDeploy提供各平台预编译库，供开发者直接下载安装使用。当然FastDeploy编译也非常容易，开发者也可根据自身需求编译FastDeploy。
 
+本文分为两部分：
+- [1.GPU部署环境](##GPU部署环境)
+- [2.CPU部署环境](##CPU部署环境)
+
 ## GPU部署环境
 
 ### 环境要求
diff --git a/docs/en/build_and_install/download_prebuilt_libraries.md b/docs/en/build_and_install/download_prebuilt_libraries.md
old mode 100644
new mode 100755
index 39cc0156c52..b9cbed42b30
--- a/docs/en/build_and_install/download_prebuilt_libraries.md
+++ b/docs/en/build_and_install/download_prebuilt_libraries.md
@@ -1,8 +1,11 @@
-
 # How to Install Prebuilt Library
 
 FastDeploy provides pre-built libraries for developers to download and install directly. Meanwhile, FastDeploy also offers easy access to compile so that developers can compile FastDeploy according to their own needs.
 
+This article is divided into two parts:
+- [1.GPU Deployment Environment](##GPU Deployment Environment)
+- [2.CPU Deployment Environment](##CPU Deployment Environment)
+
 ## GPU Deployment Environment
 
 ### Environment Requirement
@@ -16,10 +19,10 @@ FastDeploy supports Computer Vision, Text and NLP model deployment on CPU and Nv
 
 ### Python SDK
 
-Install the released version（the newest 0.4.0 for now）
+Install the released version（the newest 0.5.0 for now）
 
 ```
-pip install fastdeploy-gpu-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html 
+pip install fastdeploy-gpu-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html
 ```
 
 Install the Develop version（Nightly build）
@@ -36,12 +39,12 @@ conda config --add channels conda-forge && conda install cudatoolkit=11.2 cudnn=
 
 ### C++ SDK
 
-Install the released version（Latest 0.4.0）
+Install the released version（Latest 0.5.0）
 
 | Platform    | File                                                                                                                  | Description                                               |
 |:----------- |:--------------------------------------------------------------------------------------------------------------------- |:--------------------------------------------------------- |
-| Linux x64   | [fastdeploy-linux-x64-gpu-0.4.0.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-0.4.0.tgz) | Compiled from g++ 8.2, CUDA 11.2, cuDNN 8.2               |
-| Windows x64 | [fastdeploy-win-x64-gpu-0.4.0.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-gpu-0.4.0.zip)     | Compiled from Visual Studio 16 2019, CUDA 11.2, cuDNN 8.2 |
+| Linux x64   | [fastdeploy-linux-x64-gpu-0.5.0.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-0.5.0.tgz) | Compiled from g++ 8.2, CUDA 11.2, cuDNN 8.2               |
+| Windows x64 | [fastdeploy-win-x64-gpu-0.5.0.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-gpu-0.5.0.zip)     | Compiled from Visual Studio 16 2019, CUDA 11.2, cuDNN 8.2 |
 
 Install the Develop version（Nightly build）
 
@@ -61,7 +64,7 @@ FastDeploy supports computer vision, text and NLP model deployment on CPU with P
 
 ### Python SDK
 
-Install the released version（Latest 0.4.0 for now）
+Install the released version（Latest 0.5.0 for now）
 
 ```
 pip install fastdeploy-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html
@@ -75,15 +78,15 @@ pip install fastdeploy-python==0.0.0 -f https://www.paddlepaddle.org.cn/whl/fast
 
 ### C++ SDK
 
-Install the released version（Latest 0.4.0 for now）
+Install the released version（Latest 0.5.0 for now, Android is 0.4.0 pre-release）
 
 | Platform      | File                                                                                                                  | Description                    |
 |:------------- |:--------------------------------------------------------------------------------------------------------------------- |:------------------------------ |
-| Linux x64     | [fastdeploy-linux-x64-0.4.0.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-0.4.0.tgz)         | Compiled from g++ 8.2          |
-| Windows x64   | [fastdeploy-win-x64-0.4.0.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-0.4.0.zip)             | Compiled from Visual Studio 16 |
+| Linux x64     | [fastdeploy-linux-x64-0.5.0.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-0.5.0.tgz)         | Compiled from g++ 8.2          |
+| Windows x64   | [fastdeploy-win-x64-0.5.0.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-0.5.0.zip)             | Compiled from Visual Studio 16 |
 | Mac OSX x64   | [fastdeploy-osx-x86_64-0.4.0.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-x86_64-0.4.0.tgz)       | -                              |
 | Mac OSX arm64 | [fastdeploy-osx-arm64-0.4.0.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-arm64-0.4.0.tgz)         | -                              |
-| Linux aarch64 | [fastdeploy-linux-aarch64-0.2.0.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-0.2.0.tgz) | Compiled from g++ 6.3.0        |
+| Linux aarch64 | [fastdeploy-linux-aarch64-0.4.0.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-0.4.0.tgz) | Compiled from g++ 6.3.0        |
 | Android armv7&v8 | [fastdeploy-android-0.4.0-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-0.4.0-shared.tgz) |  Compiled from NDK 25 and clang++, support arm64-v8a and armeabi-v7a |
 
 Install the Develop version（Nightly build）

From d4ebde0665ba8bf301fa2bdbdc79639cc4c1c4e2 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Wed, 9 Nov 2022 08:38:37 +0000
Subject: [PATCH 15/50] fixed headpose typo

---
 examples/vision/headpose/README.md            | 2 +-
 examples/vision/headpose/fsanet/cpp/README.md | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)
 mode change 100644 => 100755 examples/vision/headpose/README.md
 mode change 100644 => 100755 examples/vision/headpose/fsanet/cpp/README.md

diff --git a/examples/vision/headpose/README.md b/examples/vision/headpose/README.md
old mode 100644
new mode 100755
index d4be67871c3..b727752e0a5
--- a/examples/vision/headpose/README.md
+++ b/examples/vision/headpose/README.md
@@ -1,6 +1,6 @@
 # 头部姿态模型
 
-FastDeploy目前支持如下人脸对齐模型部署
+FastDeploy目前支持如下头部姿态模型部署
 
 | 模型 | 说明 | 模型格式 | 版本 |
 | :--- | :--- | :------- | :--- |
diff --git a/examples/vision/headpose/fsanet/cpp/README.md b/examples/vision/headpose/fsanet/cpp/README.md
old mode 100644
new mode 100755
index 9fc719192be..1a3a5176992
--- a/examples/vision/headpose/fsanet/cpp/README.md
+++ b/examples/vision/headpose/fsanet/cpp/README.md
@@ -68,6 +68,7 @@ FSANet模型加载和初始化，其中model_file为导出的ONNX模型格式。
 ### 类成员变量
 用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
 > > * **size**(vector&lt;int&gt;): 通过此参数修改预处理过程中resize的大小，包含两个整型元素，表示[width, height], 默认值为[112, 112]
+
 - [模型介绍](../../)
 - [Python部署](../python)
 - [视觉模型预测结果](../../../../../docs/api/vision_results/)

From 6f653daef4f98ec450cde3230f244f5ecdcf6473 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Wed, 9 Nov 2022 08:41:49 +0000
Subject: [PATCH 16/50] fixed typo

---
 examples/text/ernie-3.0/python/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 mode change 100644 => 100755 examples/text/ernie-3.0/python/requirements.txt

diff --git a/examples/text/ernie-3.0/python/requirements.txt b/examples/text/ernie-3.0/python/requirements.txt
old mode 100644
new mode 100755
index 204cf718cdf..29711008e23
--- a/examples/text/ernie-3.0/python/requirements.txt
+++ b/examples/text/ernie-3.0/python/requirements.txt
@@ -1,2 +1,2 @@
-faster_toeknizer
+faster_tokenizer
 paddlenlp

From 005897cb08960580786bc7f6d95c26b32b439959 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Wed, 9 Nov 2022 14:26:00 +0000
Subject: [PATCH 17/50] refactor yolov5

---
 .../detection/contrib/yolov5/postprocessor.cc | 145 ++++++++++++
 .../detection/contrib/yolov5/postprocessor.h  |  80 +++++++
 .../detection/contrib/yolov5/preprocessor.cc  | 206 ++++++++++++++++++
 .../detection/contrib/yolov5/preprocessor.h   | 114 ++++++++++
 .../vision/detection/contrib/yolov5/yolov5.cc |  99 +++++++++
 .../vision/detection/contrib/yolov5/yolov5.h  |  90 ++++++++
 .../detection/contrib/yolov5/yolov5_pybind.cc |  73 +++++++
 7 files changed, 807 insertions(+)
 create mode 100755 fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
 create mode 100755 fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
 create mode 100755 fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
 create mode 100755 fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
 create mode 100755 fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
 create mode 100755 fastdeploy/vision/detection/contrib/yolov5/yolov5.h
 create mode 100755 fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc

diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
new file mode 100755
index 00000000000..9f892cd40e0
--- /dev/null
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
@@ -0,0 +1,145 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision/classification/ppcls/postprocessor.h"
+#include "fastdeploy/vision/utils/utils.h"
+
+namespace fastdeploy {
+namespace vision {
+namespace classification {
+
+YOLOv5Postprocessor::YOLOv5Postprocessor() {
+  conf_threshold_ = 0.25;
+  nms_threshold_ = 0.5;
+  multi_label_ = true;
+  initialized_ = true;
+  max_wh_ = 7680.0
+}
+
+bool YOLOv5Postprocessor::Postprocess(
+    const std::vector<FDTensor>& infer_results, std::vector<DetectionResult>* results,
+    const std::map<std::string, std::array<float, 2>>& im_info) {
+  auto& infer_result = infer_results[0];
+  for (size_t bs = 0; bs < results->size(); ++bs) {
+    *results[bs].Clear();
+    if (multi_label_) {
+      *results[bs].Reserve(infer_result.shape[1] * (infer_result.shape[2] - 5));
+    } else {
+      *results[bs].Reserve(infer_result.shape[1]);
+    }
+    if (infer_result.dtype != FDDataType::FP32) {
+      FDERROR << "Only support post process with float32 data." << std::endl;
+      return false;
+    }
+    float* data = static_cast<float*>(infer_result.Data()) + bs * infer_result.shape[1] * infer_result.shape[2];
+    for (size_t i = 0; i < infer_result.shape[1]; ++i) {
+      int s = i * infer_result.shape[2];
+      float confidence = data[s + 4];
+      if (multi_label_) {
+        for (size_t j = 5; j < infer_result.shape[2]; ++j) {
+          confidence = data[s + 4];
+          float* class_score = data + s + j;
+          confidence *= (*class_score);
+          // filter boxes by conf_threshold
+          if (confidence <= conf_threshold_) {
+            continue;
+          }
+          int32_t label_id = std::distance(data + s + 5, class_score);
+
+          // convert from [x, y, w, h] to [x1, y1, x2, y2]
+          *results[bs].boxes.emplace_back(std::array<float, 4>{
+              data[s] - data[s + 2] / 2.0f + label_id * max_wh_,
+              data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_,
+              data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_,
+              data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_});
+          *results[bs].label_ids.push_back(label_id);
+          *results[bs].scores.push_back(confidence);
+        }
+      } else {
+        float* max_class_score =
+            std::max_element(data + s + 5, data + s + infer_result.shape[2]);
+        confidence *= (*max_class_score);
+        // filter boxes by conf_threshold
+        if (confidence <= conf_threshold_) {
+          continue;
+        }
+        int32_t label_id = std::distance(data + s + 5, max_class_score);
+        // convert from [x, y, w, h] to [x1, y1, x2, y2]
+        *results[bs].boxes.emplace_back(std::array<float, 4>{
+            data[s] - data[s + 2] / 2.0f + label_id * max_wh_,
+            data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_,
+            data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_,
+            data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_});
+        *results[bs].label_ids.push_back(label_id);
+        *results[bs].scores.push_back(confidence);
+      }
+    }
+
+    if (*results[bs].boxes.size() == 0) {
+      return true;
+    }
+
+    utils::NMS(&(*results[bs]), nms_threshold_);
+
+    // scale the boxes to the origin image shape
+    auto iter_out = im_info.find("output_shape");
+    auto iter_ipt = im_info.find("input_shape");
+    FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
+            "Cannot find input_shape or output_shape from im_info.");
+    float out_h = iter_out->second[0];
+    float out_w = iter_out->second[1];
+    float ipt_h = iter_ipt->second[0];
+    float ipt_w = iter_ipt->second[1];
+    float scale = std::min(out_h / ipt_h, out_w / ipt_w);
+    for (size_t i = 0; i < *results[bs].boxes.size(); ++i) {
+      float pad_h = (out_h - ipt_h * scale) / 2;
+      float pad_w = (out_w - ipt_w * scale) / 2;
+      int32_t label_id = (*results[bs].label_ids)[i];
+      // clip box
+      *results[bs].boxes[i][0] = *results[bs].boxes[i][0] - max_wh * label_id;
+      *results[bs].boxes[i][1] = *results[bs].boxes[i][1] - max_wh * label_id;
+      *results[bs].boxes[i][2] = *results[bs].boxes[i][2] - max_wh * label_id;
+      *results[bs].boxes[i][3] = *results[bs].boxes[i][3] - max_wh * label_id;
+      *results[bs].boxes[i][0] = std::max((*results[bs].boxes[i][0] - pad_w) / scale, 0.0f);
+      *results[bs].boxes[i][1] = std::max((*results[bs].boxes[i][1] - pad_h) / scale, 0.0f);
+      *results[bs].boxes[i][2] = std::max((*results[bs].boxes[i][2] - pad_w) / scale, 0.0f);
+      *results[bs].boxes[i][3] = std::max((*results[bs].boxes[i][3] - pad_h) / scale, 0.0f);
+      *results[bs].boxes[i][0] = std::min(*results[bs].boxes[i][0], ipt_w);
+      *results[bs].boxes[i][1] = std::min(*results[bs].boxes[i][1], ipt_h);
+      *results[bs].boxes[i][2] = std::min(*results[bs].boxes[i][2], ipt_w);
+      *results[bs].boxes[i][3] = std::min(*results[bs].boxes[i][3], ipt_h);
+    }
+  }
+  return true;
+}
+
+bool YOLOv5Postprocessor::Run(const std::vector<FDTensor>& tensors, std::vector<DetectionResult>* results,
+                              std::map<std::string, std::array<float, 2>>* im_info) {
+  if (!initialized_) {
+    FDERROR << "Postprocessor is not initialized." << std::endl;
+    return false;
+  }
+
+  int batch = tensors[0].shape[0];
+ 
+  results->resize(batch);
+
+  if (!Postprocess(tensors, results, &im_info)) {
+    FDERROR << "Failed to preprocess input image." << std::endl;
+    return false;
+  }
+  return true;
+}  // namespace classification
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
new file mode 100755
index 00000000000..476f477f671
--- /dev/null
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
@@ -0,0 +1,80 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "fastdeploy/vision/common/processors/transform.h"
+#include "fastdeploy/vision/common/result.h"
+
+namespace fastdeploy {
+namespace vision {
+
+namespace detection {
+/*! @brief Postprocessor object for YOLOv5 serials model.
+ */
+class FASTDEPLOY_DECL YOLOv5Postprocessor {
+ public:
+  /** \brief Create a postprocessor instance for YOLOv5 serials model
+   */
+  YOLOv5Postprocessor();
+
+  /** \brief Process the result of runtime and fill to DetectionResult structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] result The output result of detection
+   * \param[in] im_info record input_shape and output_shape
+   * \return true if the postprocess successed, otherwise false
+   */
+  bool Run(const std::vector<FDTensor>& tensors,
+           std::vector<DetectionResult>* results,
+           std::map<std::string, std::array<float, 2>>* im_info);
+
+  /// Set conf_threshold, default 0.25
+  void SetConfThreshold(float conf_threshold) {
+    conf_threshold_ = conf_threshold;
+  }
+
+  /// Get conf_threshold, default 0.25
+  void GetConfThreshold() const { return conf_threshold_; }
+
+  /// Set nms_threshold, default 0.5
+  void SetNMSThreshold(float nms_threshold) {
+    nms_threshold_ = nms_threshold;
+  }
+
+  /// Get nms_threshold, default 0.5
+  void GetNMSThreshold() const { return nms_threshold_; }
+
+  /// Set multi_label, default true
+  void SetMultiLabel(bool multi_label) {
+    multi_label_ = multi_label;
+  }
+
+  /// Get multi_label, default true
+  void GetMultiLabel() const { return multi_label_; }
+
+ private:
+  bool Postprocess(const std::vector<FDTensor>& tensors,
+                   std::vector<DetectionResult>* results,
+                   const std::map<std::string, std::array<float, 2>>& im_info);
+
+  bool initialized_ = false;
+  float conf_threshold_;
+  float nms_threshold_;
+  bool multi_label_;
+  float max_wh_;
+};
+
+}  // namespace detection
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
new file mode 100755
index 00000000000..506d0decdce
--- /dev/null
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
@@ -0,0 +1,206 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision/detection/contrib/yolov5/preprocessor.h"
+#include "fastdeploy/function/concat.h"
+#ifdef ENABLE_CUDA_PREPROCESS
+#include "fastdeploy/vision/utils/cuda_utils.h"
+#endif  // ENABLE_CUDA_PREPROCESS
+
+namespace fastdeploy {
+namespace vision {
+namespace detection {
+
+YOLOv5Preprocessor::YOLOv5Preprocessor() {
+  size_ = {640, 640};
+  padding_value_ = {114.0, 114.0, 114.0};
+  is_mini_pad_ = false;
+  is_no_pad_ = false;
+  is_scale_up_ = false;
+  stride_ = 32;
+  max_wh_ = 7680.0;
+  initialized_ = true;
+}
+
+void YOLOv5Preprocessor::LetterBox(FDMat* mat) {
+  float scale =
+      std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width());
+  if (!is_scale_up_) {
+    scale = std::min(scale, 1.0f);
+  }
+
+  int resize_h = int(round(mat->Height() * scale));
+  int resize_w = int(round(mat->Width() * scale));
+
+  int pad_w = size_[0] - resize_w;
+  int pad_h = size_[1] - resize_h;
+  if (is_mini_pad_) {
+    pad_h = pad_h % stride_;
+    pad_w = pad_w % stride_;
+  } else if (is_no_pad_) {
+    pad_h = 0;
+    pad_w = 0;
+    resize_h = size_[1];
+    resize_w = size_[0];
+  }
+  Resize::Run(mat, resize_w, resize_h);
+  if (pad_h > 0 || pad_w > 0) {
+    float half_h = pad_h * 1.0 / 2;
+    int top = int(round(half_h - 0.1));
+    int bottom = int(round(half_h + 0.1));
+    float half_w = pad_w * 1.0 / 2;
+    int left = int(round(half_w - 0.1));
+    int right = int(round(half_w + 0.1));
+    Pad::Run(mat, top, bottom, left, right, padding_value_);
+  }
+}
+
+bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
+            std::map<std::string, std::array<float, 2>>* im_info) {
+  // Record the shape of image and the shape of preprocessed image
+  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
+                               static_cast<float>(mat->Width())};
+
+  // process after image load
+  double ratio = (size_[0] * 1.0) / std::max(static_cast<float>(mat->Height()),
+                                            static_cast<float>(mat->Width()));
+  if (ratio != 1.0) {
+    int interp = cv::INTER_AREA;
+    if (ratio > 1.0) {
+      interp = cv::INTER_LINEAR;
+    }
+    int resize_h = int(mat->Height() * ratio);
+    int resize_w = int(mat->Width() * ratio);
+    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
+  }
+  // yolov5's preprocess steps
+  // 1. letterbox
+  // 2. BGR->RGB
+  // 3. HWC->CHW
+  LetterBox(mat);
+  BGR2RGB::Run(mat);
+  // Compute `result = mat * alpha + beta` directly by channel
+  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
+  Convert::Run(mat, alpha, beta);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  HWC2CHW::Run(mat);
+  Cast::Run(mat, "float");
+  mat->ShareWithTensor(output);
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  return true;
+}
+
+void YOLOv5Preprocessor::UseCudaPreprocessing(int max_image_size) {
+#ifdef ENABLE_CUDA_PREPROCESS
+  use_cuda_preprocessing_ = true;
+  is_scale_up_ = true;
+  if (input_img_cuda_buffer_host_ == nullptr) {
+    // prepare input data cache in GPU pinned memory
+    CUDA_CHECK(cudaMallocHost((void**)&input_img_cuda_buffer_host_,
+                              max_image_size * 3));
+    // prepare input data cache in GPU device memory
+    CUDA_CHECK(
+        cudaMalloc((void**)&input_img_cuda_buffer_device_, max_image_size * 3));
+    CUDA_CHECK(cudaMalloc((void**)&input_tensor_cuda_buffer_device_,
+                          3 * size_[0] * size_[1] * sizeof(float)));
+  }
+#else
+  FDWARNING << "The FastDeploy didn't compile with BUILD_CUDA_SRC=ON."
+            << std::endl;
+  use_cuda_preprocessing_ = false;
+#endif
+}
+
+bool YOLOv5Preprocessor::CudaPreprocess(FDMat* mat, FDTensor* output,
+                std::map<std::string, std::array<float, 2>>* im_info) {
+#ifdef ENABLE_CUDA_PREPROCESS
+  if (is_mini_pad_ != false || is_no_pad_ != false || is_scale_up_ != true) {
+    FDERROR << "Preprocessing with CUDA is only available when the arguments "
+               "satisfy (is_mini_pad_=false, is_no_pad_=false, is_scale_up_=true)."
+            << std::endl;
+    return false;
+  }
+
+  // Record the shape of image and the shape of preprocessed image
+  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
+                               static_cast<float>(mat->Width())};
+
+  cudaStream_t stream = reinterpret_cast<cudaStream_t>(cuda_stream_);
+  int src_img_buf_size = mat->Height() * mat->Width() * mat->Channels();
+  memcpy(input_img_cuda_buffer_host_, mat->Data(), src_img_buf_size);
+  CUDA_CHECK(cudaMemcpyAsync(input_img_cuda_buffer_device_,
+                             input_img_cuda_buffer_host_, src_img_buf_size,
+                             cudaMemcpyHostToDevice, stream));
+  utils::CudaYoloPreprocess(input_img_cuda_buffer_device_, mat->Width(),
+                            mat->Height(), input_tensor_cuda_buffer_device_,
+                            size_[0], size_[1], padding_value_, stream);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(size_[0]),
+                                static_cast<float>(size_[1])};
+
+  output->SetExternalData({mat->Channels(), size_[0], size_[1]}, FDDataType::FP32,
+                          input_tensor_cuda_buffer_device_);
+  output->device = Device::GPU;
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  return true;
+#else
+  FDERROR << "CUDA src code was not enabled." << std::endl;
+  return false;
+#endif  // ENABLE_CUDA_PREPROCESS
+}
+
+bool YOLOv5Preprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
+                             std::map<std::string, std::array<float, 2>>* im_info) {
+  if (!initialized_) {
+    FDERROR << "The preprocessor is not initialized." << std::endl;
+    return false;
+  }
+  if (images->size() == 0) {
+    FDERROR << "The size of input images should be greater than 0." << std::endl;
+    return false;
+  }
+  outputs->resize(1);
+  // Concat all the preprocessed data to a batch tensor
+  std::vector<FDTensor> tensors(images->size()); 
+  for (size_t i = 0; i < images->size(); ++i) {
+    if (use_cuda_preprocessing_) {
+      if (!CudaPreprocess(&(*images)[i], &tensors[i], im_info)) {
+      FDERROR << "Failed to preprocess input image." << std::endl;
+      return false;
+      }
+    } else {
+      if (!Preprocess(&(*images)[i], &tensors[i], im_info)) {
+        FDERROR << "Failed to preprocess input image." << std::endl;
+        return false;
+      }
+    }
+  }
+
+  if (tensors.size() == 1) {
+    (*outputs)[0] = std::move(tensors[0]);
+  } else {
+    function::Concat(tensors, &((*outputs)[0]), 0);
+  }
+  return true;
+}
+
+}  // namespace detection
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
new file mode 100755
index 00000000000..a69002966b0
--- /dev/null
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
@@ -0,0 +1,114 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "fastdeploy/vision/common/processors/transform.h"
+#include "fastdeploy/vision/common/result.h"
+
+namespace fastdeploy {
+namespace vision {
+
+namespace detection {
+/*! @brief Preprocessor object for YOLOv5 serials model.
+ */
+class FASTDEPLOY_DECL YOLOv5Preprocessor {
+ public:
+  /** \brief Create a preprocessor instance for YOLOv5 serials model
+   */
+  YOLOv5Preprocessor();
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned by cv::imread()
+   * \param[in] outputs The output tensors which will feed in runtime
+   * \param[in] im_info record input_shape and output_shape
+   * \return true if the preprocess successed, otherwise false
+   */
+  bool Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
+           std::map<std::string, std::array<float, 2>>* im_info);
+
+  /// Set target size, tuple of (width, height), default size = {640, 640}
+  void SetSize(std::vector<int> size) { size_ = size; }
+
+  /// Get target size, tuple of (width, height), default size = {640, 640}
+  void GetSize() const { return size_; }
+
+  /// Set padding value, size should be the same as channels
+  void SetPaddingValue(std::vector<float> padding_value) {
+    padding_value_ = padding_value;
+  }
+
+  /// Get padding value, size should be the same as channels
+  void GetPaddingValue() const { return padding_value_; }
+
+ private:
+  bool Preprocess(FDMat* mat, FDTensor* output,
+                  std::map<std::string, std::array<float, 2>>* im_info);
+
+  void UseCudaPreprocessing(int max_img_size = 3840 * 2160);
+
+  bool CudaPreprocess(FDMat* mat, FDTensor* output,
+                      std::map<std::string, std::array<float, 2>>* im_info);
+
+  bool IsDynamicInput() const { return is_dynamic_input_; }
+
+  void LetterBox(FDMat* mat);
+
+  bool initialized_ = false;
+  // target size, tuple of (width, height), default size = {640, 640}
+  std::vector<int> size_;
+
+  // padding value, size should be the same as channels
+  std::vector<float> padding_value_;
+
+  // only pad to the minimum rectange which height and width is times of stride
+  bool is_mini_pad_;
+
+  // while is_mini_pad = false and is_no_pad = true,
+  // will resize the image to the set size
+  bool is_no_pad_;
+
+  // if is_scale_up is false, the input image only can be zoom out,
+  // the maximum resize scale cannot exceed 1.0
+  bool is_scale_up_;
+
+  // padding stride, for is_mini_pad
+  int stride_;
+
+  // for offseting the boxes by classes when using NMS
+  float max_wh_;
+
+  // whether to inference with dynamic shape (e.g ONNX export with dynamic shape
+  // or not.)
+  // YOLOv5 official 'export_onnx.py' script will export dynamic ONNX by
+  // default.
+  // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This
+  // value will
+  // auto check by fastdeploy after the internal Runtime already initialized.
+  bool is_dynamic_input_;
+  // CUDA host buffer for input image
+  uint8_t* input_img_cuda_buffer_host_ = nullptr;
+  // CUDA device buffer for input image
+  uint8_t* input_img_cuda_buffer_device_ = nullptr;
+  // CUDA device buffer for TRT input tensor
+  float* input_tensor_cuda_buffer_device_ = nullptr;
+  // Whether to use CUDA preprocessing
+  bool use_cuda_preprocessing_ = false;
+  // CUDA stream
+  void* cuda_stream_ = nullptr;
+};
+
+}  // namespace detection
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
new file mode 100755
index 00000000000..2e311cb111c
--- /dev/null
+++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
@@ -0,0 +1,99 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision/detection/contrib/yolov5.h"
+
+namespace fastdeploy {
+namespace vision {
+namespace detection {
+
+YOLOv5::YOLOv5(const std::string& model_file, const std::string& params_file,
+               const RuntimeOption& custom_option,
+               const ModelFormat& model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+#ifdef ENABLE_CUDA_PREPROCESS
+  cudaSetDevice(runtime_option.device_id);
+  cudaStream_t stream;
+  CUDA_CHECK(cudaStreamCreate(&stream));
+  cuda_stream_ = reinterpret_cast<void*>(stream);
+  runtime_option.SetExternalStream(cuda_stream_);
+#endif  // ENABLE_CUDA_PREPROCESS
+  initialized = Initialize();
+}
+
+bool YOLOv5::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+YOLOv5::~YOLOv5() {
+#ifdef ENABLE_CUDA_PREPROCESS
+  if (use_cuda_preprocessing_) {
+    CUDA_CHECK(cudaFreeHost(input_img_cuda_buffer_host_));
+    CUDA_CHECK(cudaFree(input_img_cuda_buffer_device_));
+    CUDA_CHECK(cudaFree(input_tensor_cuda_buffer_device_));
+    CUDA_CHECK(cudaStreamDestroy(reinterpret_cast<cudaStream_t>(cuda_stream_)));
+  }
+#endif  // ENABLE_CUDA_PREPROCESS
+}
+
+bool YOLOv5::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, float nms_threshold) {
+  postprocessor_.SetConfThreshold(conf_threshold);
+  postprocessor_.SetNMSThreshold(nms_threshold);
+
+}
+
+YOLOv5::Predict(const cv::Mat* im, DetectionResult* result) {
+
+                     }
+
+YOLOv5::BatchPredict(const std::vector<cv::Mat>& images, std::vector<DetectionResult>* results) {
+  std::map<std::string, std::array<float, 2>> im_info;
+
+  std::vector<FDMat> fd_images = WrapMat(images);
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &im_info)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results, &im_info)) {
+    FDERROR << "Failed to postprocess the inference results by runtime." << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+}  // namespace detection
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.h b/fastdeploy/vision/detection/contrib/yolov5/yolov5.h
new file mode 100755
index 00000000000..78621398881
--- /dev/null
+++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.h
@@ -0,0 +1,90 @@
+﻿// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "fastdeploy/fastdeploy_model.h"
+#include "fastdeploy/vision/detection/contrib/yolov5/preprocessor.h"
+#include "fastdeploy/vision/detection/contrib/yolov5/postprocessor.h"
+
+namespace fastdeploy {
+namespace vision {
+namespace detection {
+/*! @brief YOLOv5 model object used when to load a YOLOv5 model exported by YOLOv5.
+ */
+class FASTDEPLOY_DECL YOLOv5 : public FastDeployModel {
+ public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./yolov5.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+   * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends"
+   * \param[in] model_format Model format of the loaded model, default is ONNX format
+   */
+  YOLOv5(const std::string& model_file, const std::string& params_file = "",
+         const RuntimeOption& custom_option = RuntimeOption(),
+         const ModelFormat& model_format = ModelFormat::ONNX);
+
+  ~YOLOv5();
+
+  std::string ModelName() const { return "yolov5"; }
+
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
+   * \param[in] result The output detection result will be writen to this structure
+   * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.25
+   * \param[in] nms_threshold iou threashold for NMS, default is 0.5
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat* im, DetectionResult* result,
+                       float conf_threshold = 0.25,
+                       float nms_threshold = 0.5);
+
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
+   * \param[in] result The output detection result will be writen to this structure
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat& img, DetectionResult* result);
+
+  /** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output classification result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat>& imgs,
+                            std::vector<DetectionResult>* results);
+
+  /// Get preprocessor reference of YOLOv5
+  virtual YOLOv5Preprocessor& GetPreprocessor() {
+    return preprocessor_;
+  }
+
+  /// Get postprocessor reference of YOLOv5
+  virtual YOLOv5Postprocessor& GetPostprocessor() {
+    return postprocessor_;
+  }
+
+ protected:
+  bool Initialize();
+  YOLOv5Preprocessor preprocessor_;
+  YOLOv5Postprocessor postprocessor_;
+};
+
+}  // namespace detection
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
new file mode 100755
index 00000000000..19e59d1c670
--- /dev/null
+++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
@@ -0,0 +1,73 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/pybind/main.h"
+
+namespace fastdeploy {
+void BindYOLOv5(pybind11::module& m) {
+  pybind11::class_<vision::detection::YOLOv5, FastDeployModel>(m, "YOLOv5")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::detection::YOLOv5& self, pybind11::array& data,
+              float conf_threshold, float nms_threshold) {
+             auto mat = PyArrayToCvMat(data);
+             vision::DetectionResult res;
+             self.Predict(&mat, &res, conf_threshold, nms_threshold);
+             return res;
+           })
+      .def("use_cuda_preprocessing",
+           [](vision::detection::YOLOv5& self, int max_image_size) {
+             self.UseCudaPreprocessing(max_image_size);
+           })
+      .def_static("preprocess",
+                  [](pybind11::array& data, const std::vector<int>& size,
+                     const std::vector<float> padding_value, bool is_mini_pad,
+                     bool is_no_pad, bool is_scale_up, int stride, float max_wh,
+                     bool multi_label) {
+                    auto mat = PyArrayToCvMat(data);
+                    fastdeploy::vision::Mat fd_mat(mat);
+                    FDTensor output;
+                    std::map<std::string, std::array<float, 2>> im_info;
+                    vision::detection::YOLOv5::Preprocess(
+                        &fd_mat, &output, &im_info, size, padding_value,
+                        is_mini_pad, is_no_pad, is_scale_up, stride, max_wh,
+                        multi_label);
+                    return make_pair(TensorToPyArray(output), im_info);
+                  })
+      .def_static(
+          "postprocess",
+          [](std::vector<pybind11::array> infer_results,
+             const std::map<std::string, std::array<float, 2>>& im_info,
+             float conf_threshold, float nms_threshold, bool multi_label,
+             float max_wh) {
+            std::vector<FDTensor> fd_infer_results(infer_results.size());
+            PyArrayToTensorList(infer_results, &fd_infer_results, true);
+            vision::DetectionResult result;
+            vision::detection::YOLOv5::Postprocess(
+                fd_infer_results, &result, im_info, conf_threshold,
+                nms_threshold, multi_label, max_wh);
+            return result;
+          })
+      .def_readwrite("size", &vision::detection::YOLOv5::size_)
+      .def_readwrite("padding_value",
+                     &vision::detection::YOLOv5::padding_value_)
+      .def_readwrite("is_mini_pad", &vision::detection::YOLOv5::is_mini_pad_)
+      .def_readwrite("is_no_pad", &vision::detection::YOLOv5::is_no_pad_)
+      .def_readwrite("is_scale_up", &vision::detection::YOLOv5::is_scale_up_)
+      .def_readwrite("stride", &vision::detection::YOLOv5::stride_)
+      .def_readwrite("max_wh", &vision::detection::YOLOv5::max_wh_)
+      .def_readwrite("multi_label", &vision::detection::YOLOv5::multi_label_);
+}
+}  // namespace fastdeploy

From 0509089941ae5a6fc2f9782d8023007900de755a Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Wed, 9 Nov 2022 14:31:37 +0000
Subject: [PATCH 18/50] update model infer

---
 .../vision/detection/contrib/yolov5/yolov5.cc | 20 +++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
index 2e311cb111c..e10ceb9ed2c 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
@@ -64,17 +64,25 @@ YOLOv5::~YOLOv5() {
 bool YOLOv5::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, float nms_threshold) {
   postprocessor_.SetConfThreshold(conf_threshold);
   postprocessor_.SetNMSThreshold(nms_threshold);
-
+  if (!Predict(*im, result)) {
+    return false;
+  }
+  return true;
 }
 
-YOLOv5::Predict(const cv::Mat* im, DetectionResult* result) {
-
-                     }
+bool YOLOv5::Predict(const cv::Mat& im, DetectionResult* result) {
+  std::vector<DetectionResult> results;
+  if (!BatchPredict({im}, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
+  return true;
+}
 
-YOLOv5::BatchPredict(const std::vector<cv::Mat>& images, std::vector<DetectionResult>* results) {
+bool YOLOv5::BatchPredict(const std::vector<cv::Mat>& images, std::vector<DetectionResult>* results) {
   std::map<std::string, std::array<float, 2>> im_info;
-
   std::vector<FDMat> fd_images = WrapMat(images);
+
   if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &im_info)) {
     FDERROR << "Failed to preprocess the input image." << std::endl;
     return false;

From febb2c14f7b9acc10373bfb5d2e34712f5b1ec19 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Thu, 10 Nov 2022 08:49:31 +0000
Subject: [PATCH 19/50] refactor pybind for yolov5

---
 .../detection/contrib/yolov5/postprocessor.cc |   4 +-
 .../detection/contrib/yolov5/postprocessor.h  |   2 +-
 .../detection/contrib/yolov5/preprocessor.cc  |   4 +-
 .../detection/contrib/yolov5/preprocessor.h   |   5 +-
 .../detection/contrib/yolov5/yolov5_pybind.cc | 105 ++++----
 .../vision/detection/contrib/yolov5.py        | 246 +++++++++---------
 6 files changed, 195 insertions(+), 171 deletions(-)

diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
index 9f892cd40e0..efc5157c117 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
@@ -125,7 +125,7 @@ bool YOLOv5Postprocessor::Postprocess(
 }
 
 bool YOLOv5Postprocessor::Run(const std::vector<FDTensor>& tensors, std::vector<DetectionResult>* results,
-                              std::map<std::string, std::array<float, 2>>* im_info) {
+                              const std::map<std::string, std::array<float, 2>>& im_info) {
   if (!initialized_) {
     FDERROR << "Postprocessor is not initialized." << std::endl;
     return false;
@@ -135,7 +135,7 @@ bool YOLOv5Postprocessor::Run(const std::vector<FDTensor>& tensors, std::vector<
  
   results->resize(batch);
 
-  if (!Postprocess(tensors, results, &im_info)) {
+  if (!Postprocess(tensors, results, im_info)) {
     FDERROR << "Failed to preprocess input image." << std::endl;
     return false;
   }
diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
index 476f477f671..c40624820b6 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
@@ -37,7 +37,7 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor {
    */
   bool Run(const std::vector<FDTensor>& tensors,
            std::vector<DetectionResult>* results,
-           std::map<std::string, std::array<float, 2>>* im_info);
+           const std::map<std::string, std::array<float, 2>>& im_info);
 
   /// Set conf_threshold, default 0.25
   void SetConfThreshold(float conf_threshold) {
diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
index 506d0decdce..61de32d2770 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
@@ -102,7 +102,7 @@ bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
   HWC2CHW::Run(mat);
   Cast::Run(mat, "float");
   mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, h, w, c
   return true;
 }
 
@@ -158,7 +158,7 @@ bool YOLOv5Preprocessor::CudaPreprocess(FDMat* mat, FDTensor* output,
   output->SetExternalData({mat->Channels(), size_[0], size_[1]}, FDDataType::FP32,
                           input_tensor_cuda_buffer_device_);
   output->device = Device::GPU;
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, h, w, c
   return true;
 #else
   FDERROR << "CUDA src code was not enabled." << std::endl;
diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
index a69002966b0..b57af35909f 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
@@ -52,12 +52,13 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor {
   /// Get padding value, size should be the same as channels
   void GetPaddingValue() const { return padding_value_; }
 
+  /// Use Cuda Preprocess
+  void UseCudaPreprocessing(int max_img_size = 3840 * 2160);
+
  private:
   bool Preprocess(FDMat* mat, FDTensor* output,
                   std::map<std::string, std::array<float, 2>>* im_info);
 
-  void UseCudaPreprocessing(int max_img_size = 3840 * 2160);
-
   bool CudaPreprocess(FDMat* mat, FDTensor* output,
                       std::map<std::string, std::array<float, 2>>* im_info);
 
diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
index 19e59d1c670..21bfd830fd4 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
@@ -16,58 +16,73 @@
 
 namespace fastdeploy {
 void BindYOLOv5(pybind11::module& m) {
+  pybind11::class_<vision::detection::YOLOv5Preprocessor>(
+      m, "YOLOv5Preprocessor")
+      .def(pybind11::init<std::string>())
+      .def("run", [](vision::detection::YOLOv5Preprocessor& self, std::vector<pybind11::array>& im_list) {
+        std::vector<vision::FDMat> images;
+        for (size_t i = 0; i < im_list.size(); ++i) {
+          images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+        }
+        std::vector<FDTensor> outputs;
+        std::map<std::string, std::array<float, 2>> im_info;
+        if (!self.Run(&images, &outputs, &im_info)) {
+          pybind11::eval("raise Exception('Failed to preprocess the input data in PaddleClasPreprocessor.')");
+        }
+        return make_pair(outputs, im_info);
+      })
+      .def("use_cuda_preprocessing",
+        [](vision::detection::YOLOv5Preprocessor& self, int max_image_size) {
+          self.UseCudaPreprocessing(max_image_size);
+        })
+      .def_property("size", &vision::detection::YOLOv5Preprocessor::GetSize, &vision::detection::YOLOv5Preprocessor::SetSize)
+      .def_property("padding_value", &vision::detection::YOLOv5Preprocessor::GetPaddingValue, &vision::detection::YOLOv5Preprocessor::SetPaddingValue);
+
+  pybind11::class_<vision::detection::YOLOv5Postprocessor>(
+      m, "YOLOv5Postprocessor")
+      .def(pybind11::init<int>())
+      .def("run", [](vision::detection::YOLOv5Postprocessor& self, std::vector<FDTensor>& inputs,
+                     const std::map<std::string, std::array<float, 2>>& im_info) {
+        std::vector<vision::DetectionResult> results;
+        if (!self.Run(inputs, &results, im_info)) {
+          pybind11::eval("raise Exception('Failed to postprocess the runtime result in YOLOv5Postprocessor.')");
+        }
+        return results;
+      })
+      .def("run", [](vision::detection::YOLOv5Postprocessor& self, std::vector<pybind11::array>& input_array,
+                     const std::map<std::string, std::array<float, 2>>& im_info) {
+        std::vector<vision::DetectionResult> results;
+        std::vector<FDTensor> inputs;
+        PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+        if (!self.Run(inputs, &results)) {
+          pybind11::eval("raise Exception('Failed to postprocess the runtime result in YOLOv5Postprocessor.')");
+        }
+        return results;
+      })
+      .def_property("conf_threshold", &vision::detection::YOLOv5Postprocessor::GetConfThreshold, &vision::detection::YOLOv5Postprocessor::SetConfThreshold)
+      .def_property("nms_threshold", &vision::detection::YOLOv5Postprocessor::GetNMSThreshold, &vision::detection::YOLOv5Postprocessor::SetNMSThreshold)
+      .def_property("multi_label", &vision::detection::YOLOv5Postprocessor::GetMultiLabel, &vision::detection::YOLOv5Postprocessor::SetMultiLabel);
+
   pybind11::class_<vision::detection::YOLOv5, FastDeployModel>(m, "YOLOv5")
       .def(pybind11::init<std::string, std::string, RuntimeOption,
                           ModelFormat>())
       .def("predict",
-           [](vision::detection::YOLOv5& self, pybind11::array& data,
-              float conf_threshold, float nms_threshold) {
+           [](vision::detection::YOLOv5& self, pybind11::array& data) {
              auto mat = PyArrayToCvMat(data);
              vision::DetectionResult res;
-             self.Predict(&mat, &res, conf_threshold, nms_threshold);
+             self.Predict(mat, &res);
              return res;
            })
-      .def("use_cuda_preprocessing",
-           [](vision::detection::YOLOv5& self, int max_image_size) {
-             self.UseCudaPreprocessing(max_image_size);
-           })
-      .def_static("preprocess",
-                  [](pybind11::array& data, const std::vector<int>& size,
-                     const std::vector<float> padding_value, bool is_mini_pad,
-                     bool is_no_pad, bool is_scale_up, int stride, float max_wh,
-                     bool multi_label) {
-                    auto mat = PyArrayToCvMat(data);
-                    fastdeploy::vision::Mat fd_mat(mat);
-                    FDTensor output;
-                    std::map<std::string, std::array<float, 2>> im_info;
-                    vision::detection::YOLOv5::Preprocess(
-                        &fd_mat, &output, &im_info, size, padding_value,
-                        is_mini_pad, is_no_pad, is_scale_up, stride, max_wh,
-                        multi_label);
-                    return make_pair(TensorToPyArray(output), im_info);
-                  })
-      .def_static(
-          "postprocess",
-          [](std::vector<pybind11::array> infer_results,
-             const std::map<std::string, std::array<float, 2>>& im_info,
-             float conf_threshold, float nms_threshold, bool multi_label,
-             float max_wh) {
-            std::vector<FDTensor> fd_infer_results(infer_results.size());
-            PyArrayToTensorList(infer_results, &fd_infer_results, true);
-            vision::DetectionResult result;
-            vision::detection::YOLOv5::Postprocess(
-                fd_infer_results, &result, im_info, conf_threshold,
-                nms_threshold, multi_label, max_wh);
-            return result;
-          })
-      .def_readwrite("size", &vision::detection::YOLOv5::size_)
-      .def_readwrite("padding_value",
-                     &vision::detection::YOLOv5::padding_value_)
-      .def_readwrite("is_mini_pad", &vision::detection::YOLOv5::is_mini_pad_)
-      .def_readwrite("is_no_pad", &vision::detection::YOLOv5::is_no_pad_)
-      .def_readwrite("is_scale_up", &vision::detection::YOLOv5::is_scale_up_)
-      .def_readwrite("stride", &vision::detection::YOLOv5::stride_)
-      .def_readwrite("max_wh", &vision::detection::YOLOv5::max_wh_)
-      .def_readwrite("multi_label", &vision::detection::YOLOv5::multi_label_);
+      .def("batch_predict", [](vision::detection::YOLOv5& self, std::vector<pybind11::array>& data) {
+        std::vector<cv::Mat> images;
+        for (size_t i = 0; i < data.size(); ++i) {
+          images.push_back(PyArrayToCvMat(data[i]));
+        }
+        std::vector<vision::DetectionResult> results;
+        self.BatchPredict(images, &results);
+        return results;
+      })
+      .def_property_readonly("preprocessor", &vision::detection::YOLOv5::GetPreprocessor)
+      .def_property_readonly("postprocessor", &vision::detection::YOLOv5::GetPostprocessor);
 }
 }  // namespace fastdeploy
diff --git a/python/fastdeploy/vision/detection/contrib/yolov5.py b/python/fastdeploy/vision/detection/contrib/yolov5.py
index 5ecef307bc8..e0b5138becf 100644
--- a/python/fastdeploy/vision/detection/contrib/yolov5.py
+++ b/python/fastdeploy/vision/detection/contrib/yolov5.py
@@ -18,109 +18,38 @@
 from .... import c_lib_wrap as C
 
 
-class YOLOv5(FastDeployModel):
-    def __init__(self,
-                 model_file,
-                 params_file="",
-                 runtime_option=None,
-                 model_format=ModelFormat.ONNX):
-        """Load a YOLOv5 model exported by YOLOv5.
-
-        :param model_file: (str)Path of model file, e.g ./yolov5.onnx
-        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
-        :param runtime_option: (fastdeploy.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
-        :param model_format: (fastdeploy.ModelForamt)Model format of the loaded model
+class YOLOv5Preprocessor:
+    def __init__(self):
+        """Create a preprocessor for YOLOv5
         """
-        # 调用基函数进行backend_option的初始化
-        # 初始化后的option保存在self._runtime_option
-        super(YOLOv5, self).__init__(runtime_option)
+        self._preprocessor = C.vision.detection.YOLOv5Preprocessor()
 
-        self._model = C.vision.detection.YOLOv5(
-            model_file, params_file, self._runtime_option, model_format)
-        # 通过self.initialized判断整个模型的初始化是否成功
-        assert self.initialized, "YOLOv5 initialize failed."
+    def run(self, input_ims):
+        """Preprocess input images for YOLOv5
 
-    def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5):
-        """Detect an input image
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor
+        """
+        return self._preprocessor.run(input_ims)
 
-        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
-        :param conf_threshold: confidence threashold for postprocessing, default is 0.25
-        :param nms_iou_threshold: iou threashold for NMS, default is 0.5
-        :return: DetectionResult
+    def use_cuda_preprocessing(self, max_image_size):
+        """Preprocess input images by CUDA
+
+        :param: max_image_size: (int)Set max_image_size
         """
-        return self._model.predict(input_image, conf_threshold,
-                                   nms_iou_threshold)
-
-    @staticmethod
-    def preprocess(input_image,
-                   size=[640, 640],
-                   padding_value=[114.0, 114.0, 114.0],
-                   is_mini_pad=False,
-                   is_no_pad=False,
-                   is_scale_up=False,
-                   stride=32,
-                   max_wh=7680.0,
-                   multi_label=True):
-        return C.vision.detection.YOLOv5.preprocess(
-            input_image, size, padding_value, is_mini_pad, is_no_pad,
-            is_scale_up, stride, max_wh, multi_label)
-
-    @staticmethod
-    def postprocess(infer_result,
-                    im_info,
-                    conf_threshold=0.25,
-                    nms_iou_threshold=0.5,
-                    multi_label=True,
-                    max_wh=7680.0):
-        return C.vision.detection.YOLOv5.postprocess(
-            infer_result, im_info, conf_threshold, nms_iou_threshold,
-            multi_label, max_wh)
-
-    # 一些跟YOLOv5模型有关的属性封装
-    # 多数是预处理相关，可通过修改如model.size = [1280, 1280]改变预处理时resize的大小（前提是模型支持）
+        return self._preprocessor.use_cuda_preprocessing(max_image_size)
+
     @property
     def size(self):
         """
         Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640]
         """
-        return self._model.size
+        return self._preprocessor.size
 
     @property
     def padding_value(self):
         #  padding value, size should be the same as channels
-        return self._model.padding_value
-
-    @property
-    def is_no_pad(self):
-        # while is_mini_pad = false and is_no_pad = true, will resize the image to the set size
-        return self._model.is_no_pad
-
-    @property
-    def is_mini_pad(self):
-        # only pad to the minimum rectange which height and width is times of stride
-        return self._model.is_mini_pad
-
-    @property
-    def is_scale_up(self):
-        # if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0
-        return self._model.is_scale_up
-
-    @property
-    def stride(self):
-        # padding stride, for is_mini_pad
-        return self._model.stride
-
-    @property
-    def max_wh(self):
-        # for offseting the boxes by classes when using NMS
-        return self._model.max_wh
-
-    @property
-    def multi_label(self):
-        """
-        Argument for image preprocessing step, for different strategies to get boxes when postprocessing, default True
-        """
-        return self._model.multi_label
+        return self._preprocessor.padding_value
 
     @size.setter
     def size(self, wh):
@@ -129,50 +58,129 @@ def size(self, wh):
         assert len(wh) == 2,\
             "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
             len(wh))
-        self._model.size = wh
+        self._preprocessor.size = wh
 
     @padding_value.setter
     def padding_value(self, value):
         assert isinstance(
             value,
             list), "The value to set `padding_value` must be type of list."
-        self._model.padding_value = value
+        self._preprocessor.padding_value = value
 
-    @is_no_pad.setter
-    def is_no_pad(self, value):
-        assert isinstance(
-            value, bool), "The value to set `is_no_pad` must be type of bool."
-        self._model.is_no_pad = value
 
-    @is_mini_pad.setter
-    def is_mini_pad(self, value):
-        assert isinstance(
-            value,
-            bool), "The value to set `is_mini_pad` must be type of bool."
-        self._model.is_mini_pad = value
+class YOLOv5Postprocessor:
+    def __init__(self):
+        """Create a postprocessor for YOLOv5
+        """
+        self._postprocessor = C.vision.detection.YOLOv5Postprocessor()
 
-    @is_scale_up.setter
-    def is_scale_up(self, value):
-        assert isinstance(
-            value,
-            bool), "The value to set `is_scale_up` must be type of bool."
-        self._model.is_scale_up = value
+    def run(self, runtime_results, im_info):
+        """Postprocess the runtime results for YOLOv5
 
-    @stride.setter
-    def stride(self, value):
-        assert isinstance(
-            value, int), "The value to set `stride` must be type of int."
-        self._model.stride = value
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :param: im_info: (dict)Record input_shape and output_shape
+        :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results, im_info)
 
-    @max_wh.setter
-    def max_wh(self, value):
-        assert isinstance(
-            value, float), "The value to set `max_wh` must be type of float."
-        self._model.max_wh = value
+    @property
+    def conf_threshold(self):
+        """
+        confidence threshold for postprocessing, default is 0.25
+        """
+        return self._postprocessor.conf_threshold
+
+    @property
+    def nms_threshold(self):
+        """
+        nms threshold for postprocessing, default is 0.5
+        """
+        return self._postprocessor.nms_threshold
+
+    @property
+    def multi_label(self):
+        """
+        multi_label for postprocessing, default is true
+        """
+        return self._postprocessor.multi_label
+
+    @conf_threshold.setter
+    def conf_threshold(self, conf_threshold):
+        assert isinstance(conf_threshold, float),\
+            "The value to set `conf_threshold` must be type of float."
+        self._postprocessor.conf_threshold = conf_threshold
+
+    @nms_threshold.setter
+    def nms_threshold(self, nms_threshold):
+        assert isinstance(nms_threshold, float),\
+            "The value to set `nms_threshold` must be type of float."
+        self._postprocessor.nms_threshold = nms_threshold
 
     @multi_label.setter
     def multi_label(self, value):
         assert isinstance(
             value,
             bool), "The value to set `multi_label` must be type of bool."
-        self._model.multi_label = value
+        self._postprocessor.multi_label = value
+
+
+class YOLOv5(FastDeployModel):
+    def __init__(self,
+                 model_file,
+                 params_file="",
+                 runtime_option=None,
+                 model_format=ModelFormat.ONNX):
+        """Load a YOLOv5 model exported by YOLOv5.
+
+        :param model_file: (str)Path of model file, e.g ./yolov5.onnx
+        :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string
+        :param runtime_option: (fastdeploy.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU
+        :param model_format: (fastdeploy.ModelForamt)Model format of the loaded model
+        """
+        # 调用基函数进行backend_option的初始化
+        # 初始化后的option保存在self._runtime_option
+        super(YOLOv5, self).__init__(runtime_option)
+
+        assert model_format == ModelFormat.ONNX, "YOLOv5 only support model format of ModelFormat.ONNX now."
+        self._model = C.vision.detection.YOLOv5(
+            model_file, params_file, self._runtime_option, model_format)
+        # 通过self.initialized判断整个模型的初始化是否成功
+        assert self.initialized, "YOLOv5 initialize failed."
+
+    def predict(self, input_image, conf_threshold=0.25, nms_threshold=0.5):
+        """Detect an input image
+
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :param conf_threshold: confidence threshold for postprocessing, default is 0.25
+        :param nms_threshold: iou threshold for NMS, default is 0.5
+        :return: DetectionResult
+        """
+
+        self.postprocessor.conf_threshold = conf_threshold
+        self.postprocessor.nms_threshold = nms_threshold
+        return self._model.predict(input_image)
+
+    def batch_predict(self, images):
+        """Classify a batch of input image
+
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of DetectionResult
+        """
+
+        return self._model.batch_predict(images)
+
+    @property
+    def preprocessor(self):
+        """Get YOLOv5Preprocessor object of the loaded model
+
+        :return YOLOv5Preprocessor
+        """
+        return self._model.preprocessor
+
+    @property
+    def postprocessor(self):
+        """Get YOLOv5Postprocessor object of the loaded model
+
+        :return YOLOv5Postprocessor
+        """
+        return self._model.postprocessor

From 92dc3522372e3065b4aa551a27622aabc14cd4dd Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Thu, 10 Nov 2022 09:14:32 +0000
Subject: [PATCH 20/50] rm origin yolov5

---
 fastdeploy/vision/detection/contrib/yolov5.cc | 384 ------------------
 fastdeploy/vision/detection/contrib/yolov5.h  | 136 -------
 .../detection/contrib/yolov5/postprocessor.cc |   1 -
 .../detection/contrib/yolov5/postprocessor.h  |   7 +-
 .../detection/contrib/yolov5/preprocessor.cc  |   3 -
 .../detection/contrib/yolov5/preprocessor.h   |   7 +-
 .../vision/detection/contrib/yolov5_pybind.cc |  73 ----
 7 files changed, 9 insertions(+), 602 deletions(-)
 delete mode 100755 fastdeploy/vision/detection/contrib/yolov5.cc
 delete mode 100644 fastdeploy/vision/detection/contrib/yolov5.h
 delete mode 100644 fastdeploy/vision/detection/contrib/yolov5_pybind.cc

diff --git a/fastdeploy/vision/detection/contrib/yolov5.cc b/fastdeploy/vision/detection/contrib/yolov5.cc
deleted file mode 100755
index ba5b22363ec..00000000000
--- a/fastdeploy/vision/detection/contrib/yolov5.cc
+++ /dev/null
@@ -1,384 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/detection/contrib/yolov5.h"
-
-#include "fastdeploy/utils/perf.h"
-#include "fastdeploy/vision/utils/utils.h"
-#ifdef ENABLE_CUDA_PREPROCESS
-#include "fastdeploy/vision/utils/cuda_utils.h"
-#endif  // ENABLE_CUDA_PREPROCESS
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-void YOLOv5::LetterBox(Mat* mat, std::vector<int> size,
-                       std::vector<float> color, bool _auto, bool scale_fill,
-                       bool scale_up, int stride) {
-  float scale =
-      std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width());
-  if (!scale_up) {
-    scale = std::min(scale, 1.0f);
-  }
-
-  int resize_h = int(round(mat->Height() * scale));
-  int resize_w = int(round(mat->Width() * scale));
-
-  int pad_w = size[0] - resize_w;
-  int pad_h = size[1] - resize_h;
-  if (_auto) {
-    pad_h = pad_h % stride;
-    pad_w = pad_w % stride;
-  } else if (scale_fill) {
-    pad_h = 0;
-    pad_w = 0;
-    resize_h = size[1];
-    resize_w = size[0];
-  }
-  Resize::Run(mat, resize_w, resize_h);
-  if (pad_h > 0 || pad_w > 0) {
-    float half_h = pad_h * 1.0 / 2;
-    int top = int(round(half_h - 0.1));
-    int bottom = int(round(half_h + 0.1));
-    float half_w = pad_w * 1.0 / 2;
-    int left = int(round(half_w - 0.1));
-    int right = int(round(half_w + 0.1));
-    Pad::Run(mat, top, bottom, left, right, color);
-  }
-}
-
-YOLOv5::YOLOv5(const std::string& model_file, const std::string& params_file,
-               const RuntimeOption& custom_option,
-               const ModelFormat& model_format) {
-  if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};
-  } else {
-    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
-    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
-  }
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-#ifdef ENABLE_CUDA_PREPROCESS
-  cudaSetDevice(runtime_option.device_id);
-  cudaStream_t stream;
-  CUDA_CHECK(cudaStreamCreate(&stream));
-  cuda_stream_ = reinterpret_cast<void*>(stream);
-  runtime_option.SetExternalStream(cuda_stream_);
-#endif  // ENABLE_CUDA_PREPROCESS
-  initialized = Initialize();
-}
-
-bool YOLOv5::Initialize() {
-  // parameters for preprocess
-  size_ = {640, 640};
-  padding_value_ = {114.0, 114.0, 114.0};
-  is_mini_pad_ = false;
-  is_no_pad_ = false;
-  is_scale_up_ = false;
-  stride_ = 32;
-  max_wh_ = 7680.0;
-  multi_label_ = true;
-  reused_input_tensors_.resize(1);
-
-  if (!InitRuntime()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  // Check if the input shape is dynamic after Runtime already initialized,
-  // Note that, We need to force is_mini_pad 'false' to keep static
-  // shape after padding (LetterBox) when the is_dynamic_shape is 'false'.
-  // TODO(qiuyanjun): remove
-  // is_dynamic_input_ = false;
-  // auto shape = InputInfoOfRuntime(0).shape;
-  // for (int i = 0; i < shape.size(); ++i) {
-  //   // if height or width is dynamic
-  //   if (i >= 2 && shape[i] <= 0) {
-  //     is_dynamic_input_ = true;
-  //     break;
-  //   }
-  // }
-  // if (!is_dynamic_input_) {
-  //   is_mini_pad_ = false;
-  // }
-
-  return true;
-}
-
-YOLOv5::~YOLOv5() {
-#ifdef ENABLE_CUDA_PREPROCESS
-  if (use_cuda_preprocessing_) {
-    CUDA_CHECK(cudaFreeHost(input_img_cuda_buffer_host_));
-    CUDA_CHECK(cudaFree(input_img_cuda_buffer_device_));
-    CUDA_CHECK(cudaFree(input_tensor_cuda_buffer_device_));
-    CUDA_CHECK(cudaStreamDestroy(reinterpret_cast<cudaStream_t>(cuda_stream_)));
-  }
-#endif  // ENABLE_CUDA_PREPROCESS
-}
-
-bool YOLOv5::Preprocess(Mat* mat, FDTensor* output,
-                        std::map<std::string, std::array<float, 2>>* im_info,
-                        const std::vector<int>& size,
-                        const std::vector<float> padding_value,
-                        bool is_mini_pad, bool is_no_pad, bool is_scale_up,
-                        int stride, float max_wh, bool multi_label) {
-  // Record the shape of image and the shape of preprocessed image
-  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
-                               static_cast<float>(mat->Width())};
-  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
-                                static_cast<float>(mat->Width())};
-
-  // process after image load
-  double ratio = (size[0] * 1.0) / std::max(static_cast<float>(mat->Height()),
-                                            static_cast<float>(mat->Width()));
-  if (ratio != 1.0) {
-    int interp = cv::INTER_AREA;
-    if (ratio > 1.0) {
-      interp = cv::INTER_LINEAR;
-    }
-    int resize_h = int(mat->Height() * ratio);
-    int resize_w = int(mat->Width() * ratio);
-    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
-  }
-  // yolov5's preprocess steps
-  // 1. letterbox
-  // 2. BGR->RGB
-  // 3. HWC->CHW
-  LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, is_scale_up,
-            stride);
-  BGR2RGB::Run(mat);
-  // Normalize::Run(mat, std::vector<float>(mat->Channels(), 0.0),
-  //                std::vector<float>(mat->Channels(), 1.0));
-  // Compute `result = mat * alpha + beta` directly by channel
-  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
-  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
-  Convert::Run(mat, alpha, beta);
-
-  // Record output shape of preprocessed image
-  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
-                                static_cast<float>(mat->Width())};
-
-  HWC2CHW::Run(mat);
-  Cast::Run(mat, "float");
-  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
-  return true;
-}
-
-void YOLOv5::UseCudaPreprocessing(int max_image_size) {
-#ifdef ENABLE_CUDA_PREPROCESS
-  use_cuda_preprocessing_ = true;
-  is_scale_up_ = true;
-  if (input_img_cuda_buffer_host_ == nullptr) {
-    // prepare input data cache in GPU pinned memory
-    CUDA_CHECK(cudaMallocHost((void**)&input_img_cuda_buffer_host_,
-                              max_image_size * 3));
-    // prepare input data cache in GPU device memory
-    CUDA_CHECK(
-        cudaMalloc((void**)&input_img_cuda_buffer_device_, max_image_size * 3));
-    CUDA_CHECK(cudaMalloc((void**)&input_tensor_cuda_buffer_device_,
-                          3 * size_[0] * size_[1] * sizeof(float)));
-  }
-#else
-  FDWARNING << "The FastDeploy didn't compile with BUILD_CUDA_SRC=ON."
-            << std::endl;
-  use_cuda_preprocessing_ = false;
-#endif
-}
-
-bool YOLOv5::CudaPreprocess(
-    Mat* mat, FDTensor* output,
-    std::map<std::string, std::array<float, 2>>* im_info,
-    const std::vector<int>& size, const std::vector<float> padding_value,
-    bool is_mini_pad, bool is_no_pad, bool is_scale_up, int stride,
-    float max_wh, bool multi_label) {
-#ifdef ENABLE_CUDA_PREPROCESS
-  if (is_mini_pad != false || is_no_pad != false || is_scale_up != true) {
-    FDERROR << "Preprocessing with CUDA is only available when the arguments "
-               "satisfy (is_mini_pad=false, is_no_pad=false, is_scale_up=true)."
-            << std::endl;
-    return false;
-  }
-
-  // Record the shape of image and the shape of preprocessed image
-  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
-                               static_cast<float>(mat->Width())};
-  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
-                                static_cast<float>(mat->Width())};
-
-  cudaStream_t stream = reinterpret_cast<cudaStream_t>(cuda_stream_);
-  int src_img_buf_size = mat->Height() * mat->Width() * mat->Channels();
-  memcpy(input_img_cuda_buffer_host_, mat->Data(), src_img_buf_size);
-  CUDA_CHECK(cudaMemcpyAsync(input_img_cuda_buffer_device_,
-                             input_img_cuda_buffer_host_, src_img_buf_size,
-                             cudaMemcpyHostToDevice, stream));
-  utils::CudaYoloPreprocess(input_img_cuda_buffer_device_, mat->Width(),
-                            mat->Height(), input_tensor_cuda_buffer_device_,
-                            size[0], size[1], padding_value, stream);
-
-  // Record output shape of preprocessed image
-  (*im_info)["output_shape"] = {static_cast<float>(size[0]),
-                                static_cast<float>(size[1])};
-
-  output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32,
-                          input_tensor_cuda_buffer_device_);
-  output->device = Device::GPU;
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
-  return true;
-#else
-  FDERROR << "CUDA src code was not enabled." << std::endl;
-  return false;
-#endif  // ENABLE_CUDA_PREPROCESS
-}
-
-bool YOLOv5::Postprocess(
-    std::vector<FDTensor>& infer_results, DetectionResult* result,
-    const std::map<std::string, std::array<float, 2>>& im_info,
-    float conf_threshold, float nms_iou_threshold, bool multi_label,
-    float max_wh) {
-  auto& infer_result = infer_results[0];
-  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
-  result->Clear();
-  if (multi_label) {
-    result->Reserve(infer_result.shape[1] * (infer_result.shape[2] - 5));
-  } else {
-    result->Reserve(infer_result.shape[1]);
-  }
-  if (infer_result.dtype != FDDataType::FP32) {
-    FDERROR << "Only support post process with float32 data." << std::endl;
-    return false;
-  }
-  float* data = static_cast<float*>(infer_result.Data());
-  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
-    int s = i * infer_result.shape[2];
-    float confidence = data[s + 4];
-    if (multi_label) {
-      for (size_t j = 5; j < infer_result.shape[2]; ++j) {
-        confidence = data[s + 4];
-        float* class_score = data + s + j;
-        confidence *= (*class_score);
-        // filter boxes by conf_threshold
-        if (confidence <= conf_threshold) {
-          continue;
-        }
-        int32_t label_id = std::distance(data + s + 5, class_score);
-
-        // convert from [x, y, w, h] to [x1, y1, x2, y2]
-        result->boxes.emplace_back(std::array<float, 4>{
-            data[s] - data[s + 2] / 2.0f + label_id * max_wh,
-            data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh,
-            data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh,
-            data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh});
-        result->label_ids.push_back(label_id);
-        result->scores.push_back(confidence);
-      }
-    } else {
-      float* max_class_score =
-          std::max_element(data + s + 5, data + s + infer_result.shape[2]);
-      confidence *= (*max_class_score);
-      // filter boxes by conf_threshold
-      if (confidence <= conf_threshold) {
-        continue;
-      }
-      int32_t label_id = std::distance(data + s + 5, max_class_score);
-      // convert from [x, y, w, h] to [x1, y1, x2, y2]
-      result->boxes.emplace_back(std::array<float, 4>{
-          data[s] - data[s + 2] / 2.0f + label_id * max_wh,
-          data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh,
-          data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh,
-          data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh});
-      result->label_ids.push_back(label_id);
-      result->scores.push_back(confidence);
-    }
-  }
-
-  if (result->boxes.size() == 0) {
-    return true;
-  }
-
-  utils::NMS(result, nms_iou_threshold);
-
-  // scale the boxes to the origin image shape
-  auto iter_out = im_info.find("output_shape");
-  auto iter_ipt = im_info.find("input_shape");
-  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
-           "Cannot find input_shape or output_shape from im_info.");
-  float out_h = iter_out->second[0];
-  float out_w = iter_out->second[1];
-  float ipt_h = iter_ipt->second[0];
-  float ipt_w = iter_ipt->second[1];
-  float scale = std::min(out_h / ipt_h, out_w / ipt_w);
-  for (size_t i = 0; i < result->boxes.size(); ++i) {
-    float pad_h = (out_h - ipt_h * scale) / 2;
-    float pad_w = (out_w - ipt_w * scale) / 2;
-    int32_t label_id = (result->label_ids)[i];
-    // clip box
-    result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
-    result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
-    result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
-    result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
-    result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f);
-    result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f);
-    result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f);
-    result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f);
-    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w);
-    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h);
-    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w);
-    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h);
-  }
-  return true;
-}
-
-bool YOLOv5::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold,
-                     float nms_iou_threshold) {
-  Mat mat(*im);
-
-  std::map<std::string, std::array<float, 2>> im_info;
-
-  if (use_cuda_preprocessing_) {
-    if (!CudaPreprocess(&mat, &reused_input_tensors_[0], &im_info, size_,
-                        padding_value_, is_mini_pad_, is_no_pad_, is_scale_up_,
-                        stride_, max_wh_, multi_label_)) {
-      FDERROR << "Failed to preprocess input image." << std::endl;
-      return false;
-    }
-  } else {
-    if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info, size_,
-                    padding_value_, is_mini_pad_, is_no_pad_, is_scale_up_,
-                    stride_, max_wh_, multi_label_)) {
-      FDERROR << "Failed to preprocess input image." << std::endl;
-      return false;
-    }
-  }
-
-  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
-  if (!Infer()) {
-    FDERROR << "Failed to inference." << std::endl;
-    return false;
-  }
-
-  if (!Postprocess(reused_output_tensors_, result, im_info, conf_threshold,
-                   nms_iou_threshold, multi_label_)) {
-    FDERROR << "Failed to post process." << std::endl;
-    return false;
-  }
-  return true;
-}
-
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/contrib/yolov5.h b/fastdeploy/vision/detection/contrib/yolov5.h
deleted file mode 100644
index 4c46acd0a21..00000000000
--- a/fastdeploy/vision/detection/contrib/yolov5.h
+++ /dev/null
@@ -1,136 +0,0 @@
-﻿// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-/*! @brief YOLOv5 model object used when to load a YOLOv5 model exported by YOLOv5.
- */
-class FASTDEPLOY_DECL YOLOv5 : public FastDeployModel {
- public:
-  /** \brief  Set path of model file and the configuration of runtime.
-   *
-   * \param[in] model_file Path of model file, e.g ./yolov5.onnx
-   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored
-   * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends"
-   * \param[in] model_format Model format of the loaded model, default is ONNX format
-   */
-  YOLOv5(const std::string& model_file, const std::string& params_file = "",
-         const RuntimeOption& custom_option = RuntimeOption(),
-         const ModelFormat& model_format = ModelFormat::ONNX);
-
-  ~YOLOv5();
-
-  std::string ModelName() const { return "yolov5"; }
-  /** \brief Predict the detection result for an input image
-   *
-   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
-   * \param[in] result The output detection result will be writen to this structure
-   * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.25
-   * \param[in] nms_iou_threshold iou threashold for NMS, default is 0.5
-   * \return true if the prediction successed, otherwise false
-   */
-  virtual bool Predict(cv::Mat* im, DetectionResult* result,
-                       float conf_threshold = 0.25,
-                       float nms_iou_threshold = 0.5);
-
-  static bool Preprocess(Mat* mat, FDTensor* output,
-                         std::map<std::string, std::array<float, 2>>* im_info,
-                         const std::vector<int>& size = {640, 640},
-                         const std::vector<float> padding_value = {114.0, 114.0,
-                                                                   114.0},
-                         bool is_mini_pad = false, bool is_no_pad = false,
-                         bool is_scale_up = false, int stride = 32,
-                         float max_wh = 7680.0, bool multi_label = true);
-
-  void UseCudaPreprocessing(int max_img_size = 3840 * 2160);
-
-  bool CudaPreprocess(Mat* mat, FDTensor* output,
-                      std::map<std::string, std::array<float, 2>>* im_info,
-                      const std::vector<int>& size = {640, 640},
-                      const std::vector<float> padding_value = {114.0, 114.0,
-                                                                114.0},
-                      bool is_mini_pad = false, bool is_no_pad = false,
-                      bool is_scale_up = false, int stride = 32,
-                      float max_wh = 7680.0, bool multi_label = true);
-
-  static bool Postprocess(
-      std::vector<FDTensor>& infer_results, DetectionResult* result,
-      const std::map<std::string, std::array<float, 2>>& im_info,
-      float conf_threshold, float nms_iou_threshold, bool multi_label,
-      float max_wh = 7680.0);
-
-  /*! @brief
-  Argument for image preprocessing step, tuple of (width, height), decide the target size after resize, default size = {640, 640}
-  */
-  std::vector<int> size_;
-  // padding value, size should be the same as channels
-
-  std::vector<float> padding_value_;
-  // only pad to the minimum rectange which height and width is times of stride
-  bool is_mini_pad_;
-  // while is_mini_pad = false and is_no_pad = true,
-  // will resize the image to the set size
-  bool is_no_pad_;
-  // if is_scale_up is false, the input image only can be zoom out,
-  // the maximum resize scale cannot exceed 1.0
-  bool is_scale_up_;
-  // padding stride, for is_mini_pad
-  int stride_;
-  // for offseting the boxes by classes when using NMS
-  float max_wh_;
-  /*! @brief
-  Argument for image preprocessing step, for different strategies to get boxes when postprocessing, default true
-  */
-  bool multi_label_;
-
- private:
-  bool Initialize();
-
-  bool IsDynamicInput() const { return is_dynamic_input_; }
-
-  static void LetterBox(Mat* mat, std::vector<int> size,
-                        std::vector<float> color, bool _auto,
-                        bool scale_fill = false, bool scale_up = true,
-                        int stride = 32);
-
-  // whether to inference with dynamic shape (e.g ONNX export with dynamic shape
-  // or not.)
-  // YOLOv5 official 'export_onnx.py' script will export dynamic ONNX by
-  // default.
-  // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This
-  // value will
-  // auto check by fastdeploy after the internal Runtime already initialized.
-  bool is_dynamic_input_;
-  // CUDA host buffer for input image
-  uint8_t* input_img_cuda_buffer_host_ = nullptr;
-  // CUDA device buffer for input image
-  uint8_t* input_img_cuda_buffer_device_ = nullptr;
-  // CUDA device buffer for TRT input tensor
-  float* input_tensor_cuda_buffer_device_ = nullptr;
-  // Whether to use CUDA preprocessing
-  bool use_cuda_preprocessing_ = false;
-  // CUDA stream
-  void* cuda_stream_ = nullptr;
-};
-
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
index efc5157c117..d093abb1826 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
@@ -13,7 +13,6 @@
 // limitations under the License.
 
 #include "fastdeploy/vision/classification/ppcls/postprocessor.h"
-#include "fastdeploy/vision/utils/utils.h"
 
 namespace fastdeploy {
 namespace vision {
diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
index c40624820b6..166d956c2d7 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
@@ -15,6 +15,7 @@
 #pragma once
 #include "fastdeploy/vision/common/processors/transform.h"
 #include "fastdeploy/vision/common/result.h"
+#include "fastdeploy/vision/utils/utils.h"
 
 namespace fastdeploy {
 namespace vision {
@@ -45,7 +46,7 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor {
   }
 
   /// Get conf_threshold, default 0.25
-  void GetConfThreshold() const { return conf_threshold_; }
+  float GetConfThreshold() const { return conf_threshold_; }
 
   /// Set nms_threshold, default 0.5
   void SetNMSThreshold(float nms_threshold) {
@@ -53,7 +54,7 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor {
   }
 
   /// Get nms_threshold, default 0.5
-  void GetNMSThreshold() const { return nms_threshold_; }
+  float GetNMSThreshold() const { return nms_threshold_; }
 
   /// Set multi_label, default true
   void SetMultiLabel(bool multi_label) {
@@ -61,7 +62,7 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor {
   }
 
   /// Get multi_label, default true
-  void GetMultiLabel() const { return multi_label_; }
+  bool GetMultiLabel() const { return multi_label_; }
 
  private:
   bool Postprocess(const std::vector<FDTensor>& tensors,
diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
index 61de32d2770..1c1f1595a12 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
@@ -14,9 +14,6 @@
 
 #include "fastdeploy/vision/detection/contrib/yolov5/preprocessor.h"
 #include "fastdeploy/function/concat.h"
-#ifdef ENABLE_CUDA_PREPROCESS
-#include "fastdeploy/vision/utils/cuda_utils.h"
-#endif  // ENABLE_CUDA_PREPROCESS
 
 namespace fastdeploy {
 namespace vision {
diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
index b57af35909f..e1f389c5024 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
@@ -15,6 +15,9 @@
 #pragma once
 #include "fastdeploy/vision/common/processors/transform.h"
 #include "fastdeploy/vision/common/result.h"
+#ifdef ENABLE_CUDA_PREPROCESS
+#include "fastdeploy/vision/utils/cuda_utils.h"
+#endif  // ENABLE_CUDA_PREPROCESS
 
 namespace fastdeploy {
 namespace vision {
@@ -42,7 +45,7 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor {
   void SetSize(std::vector<int> size) { size_ = size; }
 
   /// Get target size, tuple of (width, height), default size = {640, 640}
-  void GetSize() const { return size_; }
+  std::vector<int> GetSize() const { return size_; }
 
   /// Set padding value, size should be the same as channels
   void SetPaddingValue(std::vector<float> padding_value) {
@@ -50,7 +53,7 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor {
   }
 
   /// Get padding value, size should be the same as channels
-  void GetPaddingValue() const { return padding_value_; }
+  std::vector<float> GetPaddingValue() const { return padding_value_; }
 
   /// Use Cuda Preprocess
   void UseCudaPreprocessing(int max_img_size = 3840 * 2160);
diff --git a/fastdeploy/vision/detection/contrib/yolov5_pybind.cc b/fastdeploy/vision/detection/contrib/yolov5_pybind.cc
deleted file mode 100644
index 52d0d78c9b2..00000000000
--- a/fastdeploy/vision/detection/contrib/yolov5_pybind.cc
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindYOLOv5(pybind11::module& m) {
-  pybind11::class_<vision::detection::YOLOv5, FastDeployModel>(m, "YOLOv5")
-      .def(pybind11::init<std::string, std::string, RuntimeOption,
-                          ModelFormat>())
-      .def("predict",
-           [](vision::detection::YOLOv5& self, pybind11::array& data,
-              float conf_threshold, float nms_iou_threshold) {
-             auto mat = PyArrayToCvMat(data);
-             vision::DetectionResult res;
-             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
-             return res;
-           })
-      .def("use_cuda_preprocessing",
-           [](vision::detection::YOLOv5& self, int max_image_size) {
-             self.UseCudaPreprocessing(max_image_size);
-           })
-      .def_static("preprocess",
-                  [](pybind11::array& data, const std::vector<int>& size,
-                     const std::vector<float> padding_value, bool is_mini_pad,
-                     bool is_no_pad, bool is_scale_up, int stride, float max_wh,
-                     bool multi_label) {
-                    auto mat = PyArrayToCvMat(data);
-                    fastdeploy::vision::Mat fd_mat(mat);
-                    FDTensor output;
-                    std::map<std::string, std::array<float, 2>> im_info;
-                    vision::detection::YOLOv5::Preprocess(
-                        &fd_mat, &output, &im_info, size, padding_value,
-                        is_mini_pad, is_no_pad, is_scale_up, stride, max_wh,
-                        multi_label);
-                    return make_pair(TensorToPyArray(output), im_info);
-                  })
-      .def_static(
-          "postprocess",
-          [](std::vector<pybind11::array> infer_results,
-             const std::map<std::string, std::array<float, 2>>& im_info,
-             float conf_threshold, float nms_iou_threshold, bool multi_label,
-             float max_wh) {
-            std::vector<FDTensor> fd_infer_results(infer_results.size());
-            PyArrayToTensorList(infer_results, &fd_infer_results, true);
-            vision::DetectionResult result;
-            vision::detection::YOLOv5::Postprocess(
-                fd_infer_results, &result, im_info, conf_threshold,
-                nms_iou_threshold, multi_label, max_wh);
-            return result;
-          })
-      .def_readwrite("size", &vision::detection::YOLOv5::size_)
-      .def_readwrite("padding_value",
-                     &vision::detection::YOLOv5::padding_value_)
-      .def_readwrite("is_mini_pad", &vision::detection::YOLOv5::is_mini_pad_)
-      .def_readwrite("is_no_pad", &vision::detection::YOLOv5::is_no_pad_)
-      .def_readwrite("is_scale_up", &vision::detection::YOLOv5::is_scale_up_)
-      .def_readwrite("stride", &vision::detection::YOLOv5::stride_)
-      .def_readwrite("max_wh", &vision::detection::YOLOv5::max_wh_)
-      .def_readwrite("multi_label", &vision::detection::YOLOv5::multi_label_);
-}
-}  // namespace fastdeploy

From 983301b410220e56fdaadb48f27b71d08d2bb12e Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Thu, 10 Nov 2022 09:24:20 +0000
Subject: [PATCH 21/50] fixed bugs

---
 .../detection/contrib/yolov5/postprocessor.cc | 56 +++++++++----------
 .../vision/detection/contrib/yolov5/yolov5.cc |  2 +-
 2 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
index d093abb1826..5a8d9d6e2da 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
@@ -12,11 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "fastdeploy/vision/classification/ppcls/postprocessor.h"
+#include "fastdeploy/vision/detection/contrib/yolov5/postprocessor.h"
 
 namespace fastdeploy {
 namespace vision {
-namespace classification {
+namespace detection {
 
 YOLOv5Postprocessor::YOLOv5Postprocessor() {
   conf_threshold_ = 0.25;
@@ -31,11 +31,11 @@ bool YOLOv5Postprocessor::Postprocess(
     const std::map<std::string, std::array<float, 2>>& im_info) {
   auto& infer_result = infer_results[0];
   for (size_t bs = 0; bs < results->size(); ++bs) {
-    *results[bs].Clear();
+    (*results)[bs].Clear();
     if (multi_label_) {
-      *results[bs].Reserve(infer_result.shape[1] * (infer_result.shape[2] - 5));
+      (*results)[bs].Reserve(infer_result.shape[1] * (infer_result.shape[2] - 5));
     } else {
-      *results[bs].Reserve(infer_result.shape[1]);
+      (*results)[bs].Reserve(infer_result.shape[1]);
     }
     if (infer_result.dtype != FDDataType::FP32) {
       FDERROR << "Only support post process with float32 data." << std::endl;
@@ -57,13 +57,13 @@ bool YOLOv5Postprocessor::Postprocess(
           int32_t label_id = std::distance(data + s + 5, class_score);
 
           // convert from [x, y, w, h] to [x1, y1, x2, y2]
-          *results[bs].boxes.emplace_back(std::array<float, 4>{
+          (*results)[bs].boxes.emplace_back(std::array<float, 4>{
               data[s] - data[s + 2] / 2.0f + label_id * max_wh_,
               data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_,
               data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_,
               data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_});
-          *results[bs].label_ids.push_back(label_id);
-          *results[bs].scores.push_back(confidence);
+          (*results)[bs].label_ids.push_back(label_id);
+          (*results)[bs].scores.push_back(confidence);
         }
       } else {
         float* max_class_score =
@@ -75,21 +75,21 @@ bool YOLOv5Postprocessor::Postprocess(
         }
         int32_t label_id = std::distance(data + s + 5, max_class_score);
         // convert from [x, y, w, h] to [x1, y1, x2, y2]
-        *results[bs].boxes.emplace_back(std::array<float, 4>{
+        (*results)[bs].boxes.emplace_back(std::array<float, 4>{
             data[s] - data[s + 2] / 2.0f + label_id * max_wh_,
             data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_,
             data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_,
             data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_});
-        *results[bs].label_ids.push_back(label_id);
-        *results[bs].scores.push_back(confidence);
+        (*results)[bs].label_ids.push_back(label_id);
+        (*results)[bs].scores.push_back(confidence);
       }
     }
 
-    if (*results[bs].boxes.size() == 0) {
+    if ((*results)[bs].boxes.size() == 0) {
       return true;
     }
 
-    utils::NMS(&(*results[bs]), nms_threshold_);
+    utils::NMS(&((*results)[bs]), nms_threshold_);
 
     // scale the boxes to the origin image shape
     auto iter_out = im_info.find("output_shape");
@@ -101,23 +101,23 @@ bool YOLOv5Postprocessor::Postprocess(
     float ipt_h = iter_ipt->second[0];
     float ipt_w = iter_ipt->second[1];
     float scale = std::min(out_h / ipt_h, out_w / ipt_w);
-    for (size_t i = 0; i < *results[bs].boxes.size(); ++i) {
+    for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) {
       float pad_h = (out_h - ipt_h * scale) / 2;
       float pad_w = (out_w - ipt_w * scale) / 2;
-      int32_t label_id = (*results[bs].label_ids)[i];
+      int32_t label_id = ((*results)[bs].label_ids)[i];
       // clip box
-      *results[bs].boxes[i][0] = *results[bs].boxes[i][0] - max_wh * label_id;
-      *results[bs].boxes[i][1] = *results[bs].boxes[i][1] - max_wh * label_id;
-      *results[bs].boxes[i][2] = *results[bs].boxes[i][2] - max_wh * label_id;
-      *results[bs].boxes[i][3] = *results[bs].boxes[i][3] - max_wh * label_id;
-      *results[bs].boxes[i][0] = std::max((*results[bs].boxes[i][0] - pad_w) / scale, 0.0f);
-      *results[bs].boxes[i][1] = std::max((*results[bs].boxes[i][1] - pad_h) / scale, 0.0f);
-      *results[bs].boxes[i][2] = std::max((*results[bs].boxes[i][2] - pad_w) / scale, 0.0f);
-      *results[bs].boxes[i][3] = std::max((*results[bs].boxes[i][3] - pad_h) / scale, 0.0f);
-      *results[bs].boxes[i][0] = std::min(*results[bs].boxes[i][0], ipt_w);
-      *results[bs].boxes[i][1] = std::min(*results[bs].boxes[i][1], ipt_h);
-      *results[bs].boxes[i][2] = std::min(*results[bs].boxes[i][2], ipt_w);
-      *results[bs].boxes[i][3] = std::min(*results[bs].boxes[i][3], ipt_h);
+      (*results)[bs].boxes[i][0] = (*results)[bs].boxes[i][0] - max_wh * label_id;
+      (*results)[bs].boxes[i][1] = (*results)[bs].boxes[i][1] - max_wh * label_id;
+      (*results)[bs].boxes[i][2] = (*results)[bs].boxes[i][2] - max_wh * label_id;
+      (*results)[bs].boxes[i][3] = (*results)[bs].boxes[i][3] - max_wh * label_id;
+      (*results)[bs].boxes[i][0] = std::max(((*results)[bs].boxes[i][0] - pad_w) / scale, 0.0f);
+      (*results)[bs].boxes[i][1] = std::max(((*results)[bs].boxes[i][1] - pad_h) / scale, 0.0f);
+      (*results)[bs].boxes[i][2] = std::max(((*results)[bs].boxes[i][2] - pad_w) / scale, 0.0f);
+      (*results)[bs].boxes[i][3] = std::max(((*results)[bs].boxes[i][3] - pad_h) / scale, 0.0f);
+      (*results)[bs].boxes[i][0] = std::min((*results)[bs].boxes[i][0], ipt_w);
+      (*results)[bs].boxes[i][1] = std::min((*results)[bs].boxes[i][1], ipt_h);
+      (*results)[bs].boxes[i][2] = std::min((*results)[bs].boxes[i][2], ipt_w);
+      (*results)[bs].boxes[i][3] = std::min((*results)[bs].boxes[i][3], ipt_h);
     }
   }
   return true;
@@ -139,6 +139,6 @@ bool YOLOv5Postprocessor::Run(const std::vector<FDTensor>& tensors, std::vector<
     return false;
   }
   return true;
-}  // namespace classification
+}  // namespace detection
 }  // namespace vision
 }  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
index e10ceb9ed2c..7d19d6418af 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "fastdeploy/vision/detection/contrib/yolov5.h"
+#include "fastdeploy/vision/detection/contrib/yolov5/yolov5.h"
 
 namespace fastdeploy {
 namespace vision {

From 27113e3bc68cee6e98888447fd2e1eb110e08bbc Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Thu, 10 Nov 2022 11:31:49 +0000
Subject: [PATCH 22/50] rm cuda preprocess

---
 fastdeploy/vision.h                           |  2 +-
 .../detection/contrib/yolov5/postprocessor.cc |  1 +
 .../detection/contrib/yolov5/postprocessor.h  |  1 -
 .../detection/contrib/yolov5/preprocessor.cc  | 69 +------------------
 .../detection/contrib/yolov5/preprocessor.h   | 19 -----
 .../vision/detection/contrib/yolov5/yolov5.cc | 18 -----
 .../detection/contrib/yolov5/yolov5_pybind.cc |  4 --
 .../vision/detection/contrib/yolov5.py        |  7 --
 8 files changed, 3 insertions(+), 118 deletions(-)

diff --git a/fastdeploy/vision.h b/fastdeploy/vision.h
index 44054ee9372..15cc1d009db 100755
--- a/fastdeploy/vision.h
+++ b/fastdeploy/vision.h
@@ -21,7 +21,7 @@
 #include "fastdeploy/vision/detection/contrib/nanodet_plus.h"
 #include "fastdeploy/vision/detection/contrib/scaledyolov4.h"
 #include "fastdeploy/vision/detection/contrib/yolor.h"
-#include "fastdeploy/vision/detection/contrib/yolov5.h"
+#include "fastdeploy/vision/detection/contrib/yolov5/yolov5.h"
 #include "fastdeploy/vision/detection/contrib/yolov5lite.h"
 #include "fastdeploy/vision/detection/contrib/yolov6.h"
 #include "fastdeploy/vision/detection/contrib/yolov7.h"
diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
index 5a8d9d6e2da..9156093aad7 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "fastdeploy/vision/detection/contrib/yolov5/postprocessor.h"
+#include "fastdeploy/vision/utils/utils.h"
 
 namespace fastdeploy {
 namespace vision {
diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
index 166d956c2d7..81d82c9207c 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
@@ -15,7 +15,6 @@
 #pragma once
 #include "fastdeploy/vision/common/processors/transform.h"
 #include "fastdeploy/vision/common/result.h"
-#include "fastdeploy/vision/utils/utils.h"
 
 namespace fastdeploy {
 namespace vision {
diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
index 1c1f1595a12..031b4e152e7 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
@@ -103,66 +103,6 @@ bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
   return true;
 }
 
-void YOLOv5Preprocessor::UseCudaPreprocessing(int max_image_size) {
-#ifdef ENABLE_CUDA_PREPROCESS
-  use_cuda_preprocessing_ = true;
-  is_scale_up_ = true;
-  if (input_img_cuda_buffer_host_ == nullptr) {
-    // prepare input data cache in GPU pinned memory
-    CUDA_CHECK(cudaMallocHost((void**)&input_img_cuda_buffer_host_,
-                              max_image_size * 3));
-    // prepare input data cache in GPU device memory
-    CUDA_CHECK(
-        cudaMalloc((void**)&input_img_cuda_buffer_device_, max_image_size * 3));
-    CUDA_CHECK(cudaMalloc((void**)&input_tensor_cuda_buffer_device_,
-                          3 * size_[0] * size_[1] * sizeof(float)));
-  }
-#else
-  FDWARNING << "The FastDeploy didn't compile with BUILD_CUDA_SRC=ON."
-            << std::endl;
-  use_cuda_preprocessing_ = false;
-#endif
-}
-
-bool YOLOv5Preprocessor::CudaPreprocess(FDMat* mat, FDTensor* output,
-                std::map<std::string, std::array<float, 2>>* im_info) {
-#ifdef ENABLE_CUDA_PREPROCESS
-  if (is_mini_pad_ != false || is_no_pad_ != false || is_scale_up_ != true) {
-    FDERROR << "Preprocessing with CUDA is only available when the arguments "
-               "satisfy (is_mini_pad_=false, is_no_pad_=false, is_scale_up_=true)."
-            << std::endl;
-    return false;
-  }
-
-  // Record the shape of image and the shape of preprocessed image
-  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
-                               static_cast<float>(mat->Width())};
-
-  cudaStream_t stream = reinterpret_cast<cudaStream_t>(cuda_stream_);
-  int src_img_buf_size = mat->Height() * mat->Width() * mat->Channels();
-  memcpy(input_img_cuda_buffer_host_, mat->Data(), src_img_buf_size);
-  CUDA_CHECK(cudaMemcpyAsync(input_img_cuda_buffer_device_,
-                             input_img_cuda_buffer_host_, src_img_buf_size,
-                             cudaMemcpyHostToDevice, stream));
-  utils::CudaYoloPreprocess(input_img_cuda_buffer_device_, mat->Width(),
-                            mat->Height(), input_tensor_cuda_buffer_device_,
-                            size_[0], size_[1], padding_value_, stream);
-
-  // Record output shape of preprocessed image
-  (*im_info)["output_shape"] = {static_cast<float>(size_[0]),
-                                static_cast<float>(size_[1])};
-
-  output->SetExternalData({mat->Channels(), size_[0], size_[1]}, FDDataType::FP32,
-                          input_tensor_cuda_buffer_device_);
-  output->device = Device::GPU;
-  output->ExpandDim(0);  // reshape to n, h, w, c
-  return true;
-#else
-  FDERROR << "CUDA src code was not enabled." << std::endl;
-  return false;
-#endif  // ENABLE_CUDA_PREPROCESS
-}
-
 bool YOLOv5Preprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
                              std::map<std::string, std::array<float, 2>>* im_info) {
   if (!initialized_) {
@@ -177,16 +117,9 @@ bool YOLOv5Preprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>*
   // Concat all the preprocessed data to a batch tensor
   std::vector<FDTensor> tensors(images->size()); 
   for (size_t i = 0; i < images->size(); ++i) {
-    if (use_cuda_preprocessing_) {
-      if (!CudaPreprocess(&(*images)[i], &tensors[i], im_info)) {
+    if (!Preprocess(&(*images)[i], &tensors[i], im_info)) {
       FDERROR << "Failed to preprocess input image." << std::endl;
       return false;
-      }
-    } else {
-      if (!Preprocess(&(*images)[i], &tensors[i], im_info)) {
-        FDERROR << "Failed to preprocess input image." << std::endl;
-        return false;
-      }
     }
   }
 
diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
index e1f389c5024..634531681a1 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
@@ -15,9 +15,6 @@
 #pragma once
 #include "fastdeploy/vision/common/processors/transform.h"
 #include "fastdeploy/vision/common/result.h"
-#ifdef ENABLE_CUDA_PREPROCESS
-#include "fastdeploy/vision/utils/cuda_utils.h"
-#endif  // ENABLE_CUDA_PREPROCESS
 
 namespace fastdeploy {
 namespace vision {
@@ -55,16 +52,10 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor {
   /// Get padding value, size should be the same as channels
   std::vector<float> GetPaddingValue() const { return padding_value_; }
 
-  /// Use Cuda Preprocess
-  void UseCudaPreprocessing(int max_img_size = 3840 * 2160);
-
  private:
   bool Preprocess(FDMat* mat, FDTensor* output,
                   std::map<std::string, std::array<float, 2>>* im_info);
 
-  bool CudaPreprocess(FDMat* mat, FDTensor* output,
-                      std::map<std::string, std::array<float, 2>>* im_info);
-
   bool IsDynamicInput() const { return is_dynamic_input_; }
 
   void LetterBox(FDMat* mat);
@@ -101,16 +92,6 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor {
   // value will
   // auto check by fastdeploy after the internal Runtime already initialized.
   bool is_dynamic_input_;
-  // CUDA host buffer for input image
-  uint8_t* input_img_cuda_buffer_host_ = nullptr;
-  // CUDA device buffer for input image
-  uint8_t* input_img_cuda_buffer_device_ = nullptr;
-  // CUDA device buffer for TRT input tensor
-  float* input_tensor_cuda_buffer_device_ = nullptr;
-  // Whether to use CUDA preprocessing
-  bool use_cuda_preprocessing_ = false;
-  // CUDA stream
-  void* cuda_stream_ = nullptr;
 };
 
 }  // namespace detection
diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
index 7d19d6418af..cc2cf083af4 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
@@ -32,13 +32,6 @@ YOLOv5::YOLOv5(const std::string& model_file, const std::string& params_file,
   runtime_option.model_format = model_format;
   runtime_option.model_file = model_file;
   runtime_option.params_file = params_file;
-#ifdef ENABLE_CUDA_PREPROCESS
-  cudaSetDevice(runtime_option.device_id);
-  cudaStream_t stream;
-  CUDA_CHECK(cudaStreamCreate(&stream));
-  cuda_stream_ = reinterpret_cast<void*>(stream);
-  runtime_option.SetExternalStream(cuda_stream_);
-#endif  // ENABLE_CUDA_PREPROCESS
   initialized = Initialize();
 }
 
@@ -50,17 +43,6 @@ bool YOLOv5::Initialize() {
   return true;
 }
 
-YOLOv5::~YOLOv5() {
-#ifdef ENABLE_CUDA_PREPROCESS
-  if (use_cuda_preprocessing_) {
-    CUDA_CHECK(cudaFreeHost(input_img_cuda_buffer_host_));
-    CUDA_CHECK(cudaFree(input_img_cuda_buffer_device_));
-    CUDA_CHECK(cudaFree(input_tensor_cuda_buffer_device_));
-    CUDA_CHECK(cudaStreamDestroy(reinterpret_cast<cudaStream_t>(cuda_stream_)));
-  }
-#endif  // ENABLE_CUDA_PREPROCESS
-}
-
 bool YOLOv5::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, float nms_threshold) {
   postprocessor_.SetConfThreshold(conf_threshold);
   postprocessor_.SetNMSThreshold(nms_threshold);
diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
index 21bfd830fd4..dc6b7d1d069 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
@@ -31,10 +31,6 @@ void BindYOLOv5(pybind11::module& m) {
         }
         return make_pair(outputs, im_info);
       })
-      .def("use_cuda_preprocessing",
-        [](vision::detection::YOLOv5Preprocessor& self, int max_image_size) {
-          self.UseCudaPreprocessing(max_image_size);
-        })
       .def_property("size", &vision::detection::YOLOv5Preprocessor::GetSize, &vision::detection::YOLOv5Preprocessor::SetSize)
       .def_property("padding_value", &vision::detection::YOLOv5Preprocessor::GetPaddingValue, &vision::detection::YOLOv5Preprocessor::SetPaddingValue);
 
diff --git a/python/fastdeploy/vision/detection/contrib/yolov5.py b/python/fastdeploy/vision/detection/contrib/yolov5.py
index e0b5138becf..a7fb904c952 100644
--- a/python/fastdeploy/vision/detection/contrib/yolov5.py
+++ b/python/fastdeploy/vision/detection/contrib/yolov5.py
@@ -32,13 +32,6 @@ def run(self, input_ims):
         """
         return self._preprocessor.run(input_ims)
 
-    def use_cuda_preprocessing(self, max_image_size):
-        """Preprocess input images by CUDA
-
-        :param: max_image_size: (int)Set max_image_size
-        """
-        return self._preprocessor.use_cuda_preprocessing(max_image_size)
-
     @property
     def size(self):
         """

From f7d9d20d16dd11c16404613e4b28e82279e8bc6b Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Thu, 10 Nov 2022 11:42:32 +0000
Subject: [PATCH 23/50] fixed bugs

---
 .../detection/contrib/yolov5/postprocessor.cc | 29 ++++++++++---------
 .../vision/detection/contrib/yolov5/yolov5.cc |  2 +-
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
index 9156093aad7..9104565a375 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
@@ -24,30 +24,29 @@ YOLOv5Postprocessor::YOLOv5Postprocessor() {
   nms_threshold_ = 0.5;
   multi_label_ = true;
   initialized_ = true;
-  max_wh_ = 7680.0
+  max_wh_ = 7680.0;
 }
 
 bool YOLOv5Postprocessor::Postprocess(
     const std::vector<FDTensor>& infer_results, std::vector<DetectionResult>* results,
     const std::map<std::string, std::array<float, 2>>& im_info) {
-  auto& infer_result = infer_results[0];
   for (size_t bs = 0; bs < results->size(); ++bs) {
     (*results)[bs].Clear();
     if (multi_label_) {
-      (*results)[bs].Reserve(infer_result.shape[1] * (infer_result.shape[2] - 5));
+      (*results)[bs].Reserve(infer_results[0].shape[1] * (infer_results[0].shape[2] - 5));
     } else {
-      (*results)[bs].Reserve(infer_result.shape[1]);
+      (*results)[bs].Reserve(infer_results[0].shape[1]);
     }
-    if (infer_result.dtype != FDDataType::FP32) {
+    if (infer_results[0].dtype != FDDataType::FP32) {
       FDERROR << "Only support post process with float32 data." << std::endl;
       return false;
     }
-    float* data = static_cast<float*>(infer_result.Data()) + bs * infer_result.shape[1] * infer_result.shape[2];
-    for (size_t i = 0; i < infer_result.shape[1]; ++i) {
-      int s = i * infer_result.shape[2];
+    float* data = reinterpret_cast<float*>(infer_results[0].Data()) + bs * infer_results[0].shape[1] * infer_results[0].shape[2];
+    for (size_t i = 0; i < infer_results[0].shape[1]; ++i) {
+      int s = i * infer_results[0].shape[2];
       float confidence = data[s + 4];
       if (multi_label_) {
-        for (size_t j = 5; j < infer_result.shape[2]; ++j) {
+        for (size_t j = 5; j < infer_results[0].shape[2]; ++j) {
           confidence = data[s + 4];
           float* class_score = data + s + j;
           confidence *= (*class_score);
@@ -68,7 +67,7 @@ bool YOLOv5Postprocessor::Postprocess(
         }
       } else {
         float* max_class_score =
-            std::max_element(data + s + 5, data + s + infer_result.shape[2]);
+            std::max_element(data + s + 5, data + s + infer_results[0].shape[2]);
         confidence *= (*max_class_score);
         // filter boxes by conf_threshold
         if (confidence <= conf_threshold_) {
@@ -107,10 +106,10 @@ bool YOLOv5Postprocessor::Postprocess(
       float pad_w = (out_w - ipt_w * scale) / 2;
       int32_t label_id = ((*results)[bs].label_ids)[i];
       // clip box
-      (*results)[bs].boxes[i][0] = (*results)[bs].boxes[i][0] - max_wh * label_id;
-      (*results)[bs].boxes[i][1] = (*results)[bs].boxes[i][1] - max_wh * label_id;
-      (*results)[bs].boxes[i][2] = (*results)[bs].boxes[i][2] - max_wh * label_id;
-      (*results)[bs].boxes[i][3] = (*results)[bs].boxes[i][3] - max_wh * label_id;
+      (*results)[bs].boxes[i][0] = (*results)[bs].boxes[i][0] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][1] = (*results)[bs].boxes[i][1] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][2] = (*results)[bs].boxes[i][2] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][3] = (*results)[bs].boxes[i][3] - max_wh_ * label_id;
       (*results)[bs].boxes[i][0] = std::max(((*results)[bs].boxes[i][0] - pad_w) / scale, 0.0f);
       (*results)[bs].boxes[i][1] = std::max(((*results)[bs].boxes[i][1] - pad_h) / scale, 0.0f);
       (*results)[bs].boxes[i][2] = std::max(((*results)[bs].boxes[i][2] - pad_w) / scale, 0.0f);
@@ -140,6 +139,8 @@ bool YOLOv5Postprocessor::Run(const std::vector<FDTensor>& tensors, std::vector<
     return false;
   }
   return true;
+}
+
 }  // namespace detection
 }  // namespace vision
 }  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
index cc2cf083af4..b0df6b4e955 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
@@ -76,7 +76,7 @@ bool YOLOv5::BatchPredict(const std::vector<cv::Mat>& images, std::vector<Detect
     return false;
   }
 
-  if (!postprocessor_.Run(reused_output_tensors_, results, &im_info)) {
+  if (!postprocessor_.Run(reused_output_tensors_, results, im_info)) {
     FDERROR << "Failed to postprocess the inference results by runtime." << std::endl;
     return false;
   }

From d2629e7e2cd5f55d5e3ed2495ee6b43f9dee4693 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Thu, 10 Nov 2022 11:45:57 +0000
Subject: [PATCH 24/50] fixed bugs

---
 fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
index 9104565a375..5f0cb9a6f7c 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
@@ -41,14 +41,14 @@ bool YOLOv5Postprocessor::Postprocess(
       FDERROR << "Only support post process with float32 data." << std::endl;
       return false;
     }
-    float* data = reinterpret_cast<float*>(infer_results[0].Data()) + bs * infer_results[0].shape[1] * infer_results[0].shape[2];
+    const float* data = reinterpret_cast<const float*>(infer_results[0].Data()) + bs * infer_results[0].shape[1] * infer_results[0].shape[2];
     for (size_t i = 0; i < infer_results[0].shape[1]; ++i) {
       int s = i * infer_results[0].shape[2];
       float confidence = data[s + 4];
       if (multi_label_) {
         for (size_t j = 5; j < infer_results[0].shape[2]; ++j) {
           confidence = data[s + 4];
-          float* class_score = data + s + j;
+          const float* class_score = data + s + j;
           confidence *= (*class_score);
           // filter boxes by conf_threshold
           if (confidence <= conf_threshold_) {
@@ -66,7 +66,7 @@ bool YOLOv5Postprocessor::Postprocess(
           (*results)[bs].scores.push_back(confidence);
         }
       } else {
-        float* max_class_score =
+        const float* max_class_score =
             std::max_element(data + s + 5, data + s + infer_results[0].shape[2]);
         confidence *= (*max_class_score);
         // filter boxes by conf_threshold

From 0a5611b8121ace5073132ef5fe2dfbb3d382c7c9 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Thu, 10 Nov 2022 11:49:36 +0000
Subject: [PATCH 25/50] fixed bug

---
 fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
index dc6b7d1d069..9cd2824d9ea 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
@@ -50,7 +50,7 @@ void BindYOLOv5(pybind11::module& m) {
         std::vector<vision::DetectionResult> results;
         std::vector<FDTensor> inputs;
         PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
-        if (!self.Run(inputs, &results)) {
+        if (!self.Run(inputs, &results, im_info)) {
           pybind11::eval("raise Exception('Failed to postprocess the runtime result in YOLOv5Postprocessor.')");
         }
         return results;

From 5706ff621d25dfdf0bf6be3bc4ef6e05b10b0e2c Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Thu, 10 Nov 2022 12:01:20 +0000
Subject: [PATCH 26/50] fixed bug

---
 fastdeploy/vision/detection/contrib/yolov5/yolov5.h | 4 ++--
 python/fastdeploy/vision/detection/__init__.py      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)
 mode change 100644 => 100755 python/fastdeploy/vision/detection/__init__.py

diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.h b/fastdeploy/vision/detection/contrib/yolov5/yolov5.h
index 78621398881..09de9c45ee2 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/yolov5.h
+++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.h
@@ -40,7 +40,7 @@ class FASTDEPLOY_DECL YOLOv5 : public FastDeployModel {
 
   std::string ModelName() const { return "yolov5"; }
 
-  /** \brief Predict the detection result for an input image
+  /** \brief DEPRECATED Predict the detection result for an input image, remove at 1.0 version
    *
    * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
    * \param[in] result The output detection result will be writen to this structure
@@ -63,7 +63,7 @@ class FASTDEPLOY_DECL YOLOv5 : public FastDeployModel {
   /** \brief Predict the detection results for a batch of input images
    *
    * \param[in] imgs, The input image list, each element comes from cv::imread()
-   * \param[in] results The output classification result list
+   * \param[in] results The output detection result list
    * \return true if the prediction successed, otherwise false
    */
   virtual bool BatchPredict(const std::vector<cv::Mat>& imgs,
diff --git a/python/fastdeploy/vision/detection/__init__.py b/python/fastdeploy/vision/detection/__init__.py
old mode 100644
new mode 100755
index a4fe4c035be..47d175af755
--- a/python/fastdeploy/vision/detection/__init__.py
+++ b/python/fastdeploy/vision/detection/__init__.py
@@ -18,7 +18,7 @@
 from .contrib.scaled_yolov4 import ScaledYOLOv4
 from .contrib.nanodet_plus import NanoDetPlus
 from .contrib.yolox import YOLOX
-from .contrib.yolov5 import YOLOv5
+from .contrib.yolov5 import *
 from .contrib.yolov5lite import YOLOv5Lite
 from .contrib.yolov6 import YOLOv6
 from .contrib.yolov7end2end_trt import YOLOv7End2EndTRT

From 58175642f98db91e75e94fd6cc2bda92034eacb0 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Thu, 10 Nov 2022 12:25:25 +0000
Subject: [PATCH 27/50] fix pybind

---
 fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc | 2 +-
 fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
index 5f0cb9a6f7c..de6a8de6762 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
@@ -23,8 +23,8 @@ YOLOv5Postprocessor::YOLOv5Postprocessor() {
   conf_threshold_ = 0.25;
   nms_threshold_ = 0.5;
   multi_label_ = true;
-  initialized_ = true;
   max_wh_ = 7680.0;
+  initialized_ = true;
 }
 
 bool YOLOv5Postprocessor::Postprocess(
diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
index 9cd2824d9ea..d9758e07ebc 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
@@ -18,7 +18,7 @@ namespace fastdeploy {
 void BindYOLOv5(pybind11::module& m) {
   pybind11::class_<vision::detection::YOLOv5Preprocessor>(
       m, "YOLOv5Preprocessor")
-      .def(pybind11::init<std::string>())
+      .def(pybind11::init<>())
       .def("run", [](vision::detection::YOLOv5Preprocessor& self, std::vector<pybind11::array>& im_list) {
         std::vector<vision::FDMat> images;
         for (size_t i = 0; i < im_list.size(); ++i) {
@@ -36,7 +36,7 @@ void BindYOLOv5(pybind11::module& m) {
 
   pybind11::class_<vision::detection::YOLOv5Postprocessor>(
       m, "YOLOv5Postprocessor")
-      .def(pybind11::init<int>())
+      .def(pybind11::init<>())
       .def("run", [](vision::detection::YOLOv5Postprocessor& self, std::vector<FDTensor>& inputs,
                      const std::map<std::string, std::array<float, 2>>& im_info) {
         std::vector<vision::DetectionResult> results;

From 3e94507fef0b117509ac8b6df4fc5c763e8a824e Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Thu, 10 Nov 2022 12:54:54 +0000
Subject: [PATCH 28/50] rm useless code

---
 fastdeploy/vision/detection/contrib/yolov5/yolov5.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.h b/fastdeploy/vision/detection/contrib/yolov5/yolov5.h
index 09de9c45ee2..53bcfce755d 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/yolov5.h
+++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.h
@@ -36,8 +36,6 @@ class FASTDEPLOY_DECL YOLOv5 : public FastDeployModel {
          const RuntimeOption& custom_option = RuntimeOption(),
          const ModelFormat& model_format = ModelFormat::ONNX);
 
-  ~YOLOv5();
-
   std::string ModelName() const { return "yolov5"; }
 
   /** \brief DEPRECATED Predict the detection result for an input image, remove at 1.0 version

From b97542fe1f38d7725268f3aa29eec0bbb2ea388d Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Sun, 13 Nov 2022 05:31:57 +0000
Subject: [PATCH 29/50] add convert_and_permute

---
 .../common/processors/convert_and_permute.cc  | 94 +++++++++++++++++++
 .../common/processors/convert_and_permute.h   | 66 +++++++++++++
 .../vision/common/processors/transform.h      |  1 +
 .../detection/contrib/yolov5/postprocessor.cc | 51 ++++------
 .../detection/contrib/yolov5/postprocessor.h  | 13 +--
 .../detection/contrib/yolov5/preprocessor.cc  | 10 +-
 .../detection/contrib/yolov5/preprocessor.h   | 18 +---
 7 files changed, 190 insertions(+), 63 deletions(-)
 create mode 100644 fastdeploy/vision/common/processors/convert_and_permute.cc
 create mode 100644 fastdeploy/vision/common/processors/convert_and_permute.h
 mode change 100644 => 100755 fastdeploy/vision/common/processors/transform.h

diff --git a/fastdeploy/vision/common/processors/convert_and_permute.cc b/fastdeploy/vision/common/processors/convert_and_permute.cc
new file mode 100644
index 00000000000..042e9a13962
--- /dev/null
+++ b/fastdeploy/vision/common/processors/convert_and_permute.cc
@@ -0,0 +1,94 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision/common/processors/convert_and_permute.h"
+
+namespace fastdeploy {
+namespace vision {
+
+ConvertAndPermute::ConvertAndPermute(const std::vector<float>& alpha,
+                                     const std::vector<float>& beta,
+                                     bool swap_rb) {
+  FDASSERT(alpha.size() == beta.size(),
+           "Convert: requires the size of alpha equal to the size of beta.");
+  FDASSERT(alpha.size() > 0 && beta.size() > 0,
+           "Convert: requires the size of alpha and beta > 0.");
+  alpha_.assign(alpha.begin(), alpha.end());
+  beta_.assign(beta.begin(), beta.end());
+  swap_rb_ = swap_rb;
+}
+
+bool ConvertAndPermute::ImplByOpenCV(FDMat* mat) {
+  cv::Mat* im = mat->GetOpenCVMat();
+  int origin_w = im->cols;
+  int origin_h = im->rows;
+  std::vector<cv::Mat> split_im;
+  cv::split(*im, split_im);
+  if (swap_rb_) std::swap(split_im[0], split_im[2]);
+  for (int c = 0; c < im->channels(); c++) {
+    split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
+  }
+  cv::Mat res(origin_h, origin_w, CV_32FC(im->channels()));
+  for (int i = 0; i < im->channels(); ++i) {
+    cv::extractChannel(split_im[i],
+                       cv::Mat(origin_h, origin_w, CV_32FC1,
+                               res.ptr() + i * origin_h * origin_w * 4),
+                       0);
+  }
+
+  mat->SetMat(res);
+  mat->layout = Layout::CHW;
+  return true;
+}
+
+#ifdef ENABLE_FLYCV
+bool ConvertAndPermute::ImplByFlyCV(FDMat* mat) {
+  if (mat->layout != Layout::HWC) {
+    FDERROR << "Only supports input with HWC layout." << std::endl;
+    return false;
+  }
+  fcv::Mat* im = mat->GetFlyCVMat();
+  if (im->channels() != 3) {
+    FDERROR << "Only supports 3-channels image in FlyCV, but now it's "
+            << im->channels() << "." << std::endl;
+    return false;
+  }
+  std::vector<float> mean(3, 0);
+  std::vector<float> std(3, 0);
+  for (size_t i = 0; i < 3; ++i) {
+    std[i] = 1.0 / alpha_[i];
+    mean[i] = -1 * beta_[i] * std[i];
+  }
+
+  std::vector<uint32_t> channel_reorder_index = {0, 1, 2};
+  if (swap_rb_) std::swap(channel_reorder_index[0], channel_reorder_index[2]);
+
+  fcv::Mat new_im;
+  fcv::normalize_to_submean_to_reorder(*im, mean, std, channel_reorder_index,
+                                       new_im, false);
+  mat->SetMat(new_im);
+  mat->layout = Layout::CHW;
+  return true;
+}
+#endif
+
+bool ConvertAndPermute::Run(FDMat* mat, const std::vector<float>& alpha,
+                            const std::vector<float>& beta, bool swap_rb,
+                            ProcLib lib) {
+  auto n = ConvertAndPermute(alpha, beta, swap_rb);
+  return n(mat, lib);
+}
+
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/common/processors/convert_and_permute.h b/fastdeploy/vision/common/processors/convert_and_permute.h
new file mode 100644
index 00000000000..d4fc5da1213
--- /dev/null
+++ b/fastdeploy/vision/common/processors/convert_and_permute.h
@@ -0,0 +1,66 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "fastdeploy/vision/common/processors/base.h"
+
+namespace fastdeploy {
+namespace vision {
+class FASTDEPLOY_DECL ConvertAndPermute : public Processor {
+ public:
+  ConvertAndPermute(const std::vector<float>& alpha = std::vector<float>(),
+                    const std::vector<float>& beta = std::vector<float>(),
+                    bool swap_rb = false);
+  bool ImplByOpenCV(FDMat* mat);
+#ifdef ENABLE_FLYCV
+  bool ImplByFlyCV(FDMat* mat);
+#endif
+  std::string Name() { return "ConvertAndPermute"; }
+
+  static bool Run(FDMat* mat, const std::vector<float>& alpha,
+                  const std::vector<float>& beta, bool swap_rb = false,
+                  ProcLib lib = ProcLib::DEFAULT);
+
+  std::vector<float> GetAlpha() const { return alpha_; }
+
+  void SetAlpha(const std::vector<float>& alpha) {
+    alpha_.clear();
+    std::vector<float>().swap(alpha_);
+    alpha_.assign(alpha.begin(), alpha.end());
+  }
+
+  std::vector<float> GetBeta() const { return beta_; }
+
+  void SetBeta(const std::vector<float>& beta) {
+    beta_.clear();
+    std::vector<float>().swap(beta_);
+    beta_.assign(beta.begin(), beta.end());
+  }
+
+  bool GetSwapRB() {
+    return swap_rb_;
+  }
+
+  void SetSwapRB(const bool& swap_rb) {
+    swap_rb_ = swap_rb;
+  }
+
+ private:
+  std::vector<float> alpha_;
+  std::vector<float> beta_;
+  bool swap_rb_;
+};
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/common/processors/transform.h b/fastdeploy/vision/common/processors/transform.h
old mode 100644
new mode 100755
index 7ba58167d46..e3f71463657
--- a/fastdeploy/vision/common/processors/transform.h
+++ b/fastdeploy/vision/common/processors/transform.h
@@ -18,6 +18,7 @@
 #include "fastdeploy/vision/common/processors/center_crop.h"
 #include "fastdeploy/vision/common/processors/color_space_convert.h"
 #include "fastdeploy/vision/common/processors/convert.h"
+#include "fastdeploy/vision/common/processors/convert_and_permute.h"
 #include "fastdeploy/vision/common/processors/crop.h"
 #include "fastdeploy/vision/common/processors/hwc2chw.h"
 #include "fastdeploy/vision/common/processors/limit_by_stride.h"
diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
index de6a8de6762..6f36078a3f0 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
@@ -24,29 +24,36 @@ YOLOv5Postprocessor::YOLOv5Postprocessor() {
   nms_threshold_ = 0.5;
   multi_label_ = true;
   max_wh_ = 7680.0;
-  initialized_ = true;
 }
 
-bool YOLOv5Postprocessor::Postprocess(
-    const std::vector<FDTensor>& infer_results, std::vector<DetectionResult>* results,
-    const std::map<std::string, std::array<float, 2>>& im_info) {
-  for (size_t bs = 0; bs < results->size(); ++bs) {
+bool YOLOv5Postprocessor::Run(const std::vector<FDTensor>& tensors, std::vector<DetectionResult>* results,
+                              const std::map<std::string, std::array<float, 2>>& im_info) {
+  if (!initialized_) {
+    FDERROR << "Postprocessor is not initialized." << std::endl;
+    return false;
+  }
+
+  int batch = tensors[0].shape[0];
+ 
+  results->resize(batch);
+
+  for (size_t bs = 0; bs < batch; ++bs) {
     (*results)[bs].Clear();
     if (multi_label_) {
-      (*results)[bs].Reserve(infer_results[0].shape[1] * (infer_results[0].shape[2] - 5));
+      (*results)[bs].Reserve(tensors[0].shape[1] * (tensors[0].shape[2] - 5));
     } else {
-      (*results)[bs].Reserve(infer_results[0].shape[1]);
+      (*results)[bs].Reserve(tensors[0].shape[1]);
     }
-    if (infer_results[0].dtype != FDDataType::FP32) {
+    if (tensors[0].dtype != FDDataType::FP32) {
       FDERROR << "Only support post process with float32 data." << std::endl;
       return false;
     }
-    const float* data = reinterpret_cast<const float*>(infer_results[0].Data()) + bs * infer_results[0].shape[1] * infer_results[0].shape[2];
-    for (size_t i = 0; i < infer_results[0].shape[1]; ++i) {
-      int s = i * infer_results[0].shape[2];
+    const float* data = reinterpret_cast<const float*>(tensors[0].Data()) + bs * tensors[0].shape[1] * tensors[0].shape[2];
+    for (size_t i = 0; i < tensors[0].shape[1]; ++i) {
+      int s = i * tensors[0].shape[2];
       float confidence = data[s + 4];
       if (multi_label_) {
-        for (size_t j = 5; j < infer_results[0].shape[2]; ++j) {
+        for (size_t j = 5; j < tensors[0].shape[2]; ++j) {
           confidence = data[s + 4];
           const float* class_score = data + s + j;
           confidence *= (*class_score);
@@ -67,7 +74,7 @@ bool YOLOv5Postprocessor::Postprocess(
         }
       } else {
         const float* max_class_score =
-            std::max_element(data + s + 5, data + s + infer_results[0].shape[2]);
+            std::max_element(data + s + 5, data + s + tensors[0].shape[2]);
         confidence *= (*max_class_score);
         // filter boxes by conf_threshold
         if (confidence <= conf_threshold_) {
@@ -123,24 +130,6 @@ bool YOLOv5Postprocessor::Postprocess(
   return true;
 }
 
-bool YOLOv5Postprocessor::Run(const std::vector<FDTensor>& tensors, std::vector<DetectionResult>* results,
-                              const std::map<std::string, std::array<float, 2>>& im_info) {
-  if (!initialized_) {
-    FDERROR << "Postprocessor is not initialized." << std::endl;
-    return false;
-  }
-
-  int batch = tensors[0].shape[0];
- 
-  results->resize(batch);
-
-  if (!Postprocess(tensors, results, im_info)) {
-    FDERROR << "Failed to preprocess input image." << std::endl;
-    return false;
-  }
-  return true;
-}
-
 }  // namespace detection
 }  // namespace vision
 }  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
index 81d82c9207c..110a22feed7 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
@@ -40,7 +40,7 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor {
            const std::map<std::string, std::array<float, 2>>& im_info);
 
   /// Set conf_threshold, default 0.25
-  void SetConfThreshold(float conf_threshold) {
+  void SetConfThreshold(const float& conf_threshold) {
     conf_threshold_ = conf_threshold;
   }
 
@@ -48,7 +48,7 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor {
   float GetConfThreshold() const { return conf_threshold_; }
 
   /// Set nms_threshold, default 0.5
-  void SetNMSThreshold(float nms_threshold) {
+  void SetNMSThreshold(const float& nms_threshold) {
     nms_threshold_ = nms_threshold;
   }
 
@@ -56,19 +56,14 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor {
   float GetNMSThreshold() const { return nms_threshold_; }
 
   /// Set multi_label, default true
-  void SetMultiLabel(bool multi_label) {
+  void SetMultiLabel(const bool& multi_label) {
     multi_label_ = multi_label;
   }
 
   /// Get multi_label, default true
   bool GetMultiLabel() const { return multi_label_; }
 
- private:
-  bool Postprocess(const std::vector<FDTensor>& tensors,
-                   std::vector<DetectionResult>* results,
-                   const std::map<std::string, std::array<float, 2>>& im_info);
-
-  bool initialized_ = false;
+ protected:
   float conf_threshold_;
   float nms_threshold_;
   bool multi_label_;
diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
index 031b4e152e7..aa4ee9ae6c9 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
@@ -27,7 +27,6 @@ YOLOv5Preprocessor::YOLOv5Preprocessor() {
   is_scale_up_ = false;
   stride_ = 32;
   max_wh_ = 7680.0;
-  initialized_ = true;
 }
 
 void YOLOv5Preprocessor::LetterBox(FDMat* mat) {
@@ -83,21 +82,16 @@ bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
   }
   // yolov5's preprocess steps
   // 1. letterbox
-  // 2. BGR->RGB
-  // 3. HWC->CHW
+  // 2. convert_and_permute(swap_rb=true)
   LetterBox(mat);
-  BGR2RGB::Run(mat);
-  // Compute `result = mat * alpha + beta` directly by channel
   std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
   std::vector<float> beta = {0.0f, 0.0f, 0.0f};
-  Convert::Run(mat, alpha, beta);
+  ConvertAndPermute::Run(mat, alpha, beta, true);
 
   // Record output shape of preprocessed image
   (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
                                 static_cast<float>(mat->Width())};
 
-  HWC2CHW::Run(mat);
-  Cast::Run(mat, "float");
   mat->ShareWithTensor(output);
   output->ExpandDim(0);  // reshape to n, h, w, c
   return true;
diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
index 634531681a1..bb2eaaa8bb6 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
@@ -39,28 +39,25 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor {
            std::map<std::string, std::array<float, 2>>* im_info);
 
   /// Set target size, tuple of (width, height), default size = {640, 640}
-  void SetSize(std::vector<int> size) { size_ = size; }
+  void SetSize(const std::vector<int>& size) { size_ = size; }
 
   /// Get target size, tuple of (width, height), default size = {640, 640}
   std::vector<int> GetSize() const { return size_; }
 
   /// Set padding value, size should be the same as channels
-  void SetPaddingValue(std::vector<float> padding_value) {
+  void SetPaddingValue(const std::vector<float>& padding_value) {
     padding_value_ = padding_value;
   }
 
   /// Get padding value, size should be the same as channels
   std::vector<float> GetPaddingValue() const { return padding_value_; }
 
- private:
+ protected:
   bool Preprocess(FDMat* mat, FDTensor* output,
                   std::map<std::string, std::array<float, 2>>* im_info);
 
-  bool IsDynamicInput() const { return is_dynamic_input_; }
-
   void LetterBox(FDMat* mat);
 
-  bool initialized_ = false;
   // target size, tuple of (width, height), default size = {640, 640}
   std::vector<int> size_;
 
@@ -83,15 +80,6 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor {
 
   // for offseting the boxes by classes when using NMS
   float max_wh_;
-
-  // whether to inference with dynamic shape (e.g ONNX export with dynamic shape
-  // or not.)
-  // YOLOv5 official 'export_onnx.py' script will export dynamic ONNX by
-  // default.
-  // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This
-  // value will
-  // auto check by fastdeploy after the internal Runtime already initialized.
-  bool is_dynamic_input_;
 };
 
 }  // namespace detection

From d83fd12830671eb4380419ed7b5c6bef601a6dcf Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Sun, 13 Nov 2022 05:48:50 +0000
Subject: [PATCH 30/50] fixed bugs

---
 fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc | 5 -----
 fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc  | 4 ----
 2 files changed, 9 deletions(-)

diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
index 6f36078a3f0..ef6fc48c52d 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
@@ -28,11 +28,6 @@ YOLOv5Postprocessor::YOLOv5Postprocessor() {
 
 bool YOLOv5Postprocessor::Run(const std::vector<FDTensor>& tensors, std::vector<DetectionResult>* results,
                               const std::map<std::string, std::array<float, 2>>& im_info) {
-  if (!initialized_) {
-    FDERROR << "Postprocessor is not initialized." << std::endl;
-    return false;
-  }
-
   int batch = tensors[0].shape[0];
  
   results->resize(batch);
diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
index aa4ee9ae6c9..d25fca76daf 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
@@ -99,10 +99,6 @@ bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
 
 bool YOLOv5Preprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
                              std::map<std::string, std::array<float, 2>>* im_info) {
-  if (!initialized_) {
-    FDERROR << "The preprocessor is not initialized." << std::endl;
-    return false;
-  }
   if (images->size() == 0) {
     FDERROR << "The size of input images should be greater than 0." << std::endl;
     return false;

From 302ca01014cb5b12a3fc9eba3e6598091f605135 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Sun, 13 Nov 2022 08:30:57 +0000
Subject: [PATCH 31/50] fixed im_info for bs_predict

---
 .../detection/contrib/yolov5/postprocessor.cc      |  8 ++++----
 .../detection/contrib/yolov5/postprocessor.h       |  6 +++---
 .../detection/contrib/yolov5/preprocessor.cc       |  4 ++--
 .../vision/detection/contrib/yolov5/preprocessor.h |  4 ++--
 .../vision/detection/contrib/yolov5/yolov5.cc      |  5 +++--
 .../detection/contrib/yolov5/yolov5_pybind.cc      | 14 +++++++-------
 .../fastdeploy/vision/detection/contrib/yolov5.py  |  6 +++---
 7 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
index ef6fc48c52d..dd61efb0023 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
@@ -27,7 +27,7 @@ YOLOv5Postprocessor::YOLOv5Postprocessor() {
 }
 
 bool YOLOv5Postprocessor::Run(const std::vector<FDTensor>& tensors, std::vector<DetectionResult>* results,
-                              const std::map<std::string, std::array<float, 2>>& im_info) {
+                              const std::vector<std::map<std::string, std::array<float, 2>>>& ims_info) {
   int batch = tensors[0].shape[0];
  
   results->resize(batch);
@@ -94,9 +94,9 @@ bool YOLOv5Postprocessor::Run(const std::vector<FDTensor>& tensors, std::vector<
     utils::NMS(&((*results)[bs]), nms_threshold_);
 
     // scale the boxes to the origin image shape
-    auto iter_out = im_info.find("output_shape");
-    auto iter_ipt = im_info.find("input_shape");
-    FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
+    auto iter_out = ims_info[bs].find("output_shape");
+    auto iter_ipt = ims_info[bs].find("input_shape");
+    FDASSERT(iter_out != ims_info[bs].end() && iter_ipt != ims_info[bs].end(),
             "Cannot find input_shape or output_shape from im_info.");
     float out_h = iter_out->second[0];
     float out_w = iter_out->second[1];
diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
index 110a22feed7..9623165f951 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
@@ -32,12 +32,12 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor {
    *
    * \param[in] tensors The inference result from runtime
    * \param[in] result The output result of detection
-   * \param[in] im_info record input_shape and output_shape
+   * \param[in] ims_info The shape info list, record input_shape and output_shape
    * \return true if the postprocess successed, otherwise false
    */
   bool Run(const std::vector<FDTensor>& tensors,
-           std::vector<DetectionResult>* results,
-           const std::map<std::string, std::array<float, 2>>& im_info);
+      std::vector<DetectionResult>* results,
+      const std::vector<std::map<std::string, std::array<float, 2>>>& ims_info);
 
   /// Set conf_threshold, default 0.25
   void SetConfThreshold(const float& conf_threshold) {
diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
index d25fca76daf..6517d7c72ad 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
@@ -98,7 +98,7 @@ bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
 }
 
 bool YOLOv5Preprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
-                             std::map<std::string, std::array<float, 2>>* im_info) {
+                             std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
   if (images->size() == 0) {
     FDERROR << "The size of input images should be greater than 0." << std::endl;
     return false;
@@ -107,7 +107,7 @@ bool YOLOv5Preprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>*
   // Concat all the preprocessed data to a batch tensor
   std::vector<FDTensor> tensors(images->size()); 
   for (size_t i = 0; i < images->size(); ++i) {
-    if (!Preprocess(&(*images)[i], &tensors[i], im_info)) {
+    if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
       FDERROR << "Failed to preprocess input image." << std::endl;
       return false;
     }
diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
index bb2eaaa8bb6..b3559685db7 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
@@ -32,11 +32,11 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor {
    *
    * \param[in] images The input image data list, all the elements are returned by cv::imread()
    * \param[in] outputs The output tensors which will feed in runtime
-   * \param[in] im_info record input_shape and output_shape
+   * \param[in] ims_info The shape info list, record input_shape and output_shape
    * \return true if the preprocess successed, otherwise false
    */
   bool Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
-           std::map<std::string, std::array<float, 2>>* im_info);
+           std::vector<std::map<std::string, std::array<float, 2>>>* ims_info);
 
   /// Set target size, tuple of (width, height), default size = {640, 640}
   void SetSize(const std::vector<int>& size) { size_ = size; }
diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
index b0df6b4e955..affad85f527 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
@@ -62,10 +62,11 @@ bool YOLOv5::Predict(const cv::Mat& im, DetectionResult* result) {
 }
 
 bool YOLOv5::BatchPredict(const std::vector<cv::Mat>& images, std::vector<DetectionResult>* results) {
-  std::map<std::string, std::array<float, 2>> im_info;
+  std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+  ims_info.resize(images.size());
   std::vector<FDMat> fd_images = WrapMat(images);
 
-  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &im_info)) {
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) {
     FDERROR << "Failed to preprocess the input image." << std::endl;
     return false;
   }
diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
index d9758e07ebc..4e2f69957e4 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
@@ -25,11 +25,11 @@ void BindYOLOv5(pybind11::module& m) {
           images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
         }
         std::vector<FDTensor> outputs;
-        std::map<std::string, std::array<float, 2>> im_info;
-        if (!self.Run(&images, &outputs, &im_info)) {
+        std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+        if (!self.Run(&images, &outputs, &ims_info)) {
           pybind11::eval("raise Exception('Failed to preprocess the input data in PaddleClasPreprocessor.')");
         }
-        return make_pair(outputs, im_info);
+        return make_pair(outputs, ims_info);
       })
       .def_property("size", &vision::detection::YOLOv5Preprocessor::GetSize, &vision::detection::YOLOv5Preprocessor::SetSize)
       .def_property("padding_value", &vision::detection::YOLOv5Preprocessor::GetPaddingValue, &vision::detection::YOLOv5Preprocessor::SetPaddingValue);
@@ -38,19 +38,19 @@ void BindYOLOv5(pybind11::module& m) {
       m, "YOLOv5Postprocessor")
       .def(pybind11::init<>())
       .def("run", [](vision::detection::YOLOv5Postprocessor& self, std::vector<FDTensor>& inputs,
-                     const std::map<std::string, std::array<float, 2>>& im_info) {
+                     const std::vector<std::map<std::string, std::array<float, 2>>>& ims_info) {
         std::vector<vision::DetectionResult> results;
-        if (!self.Run(inputs, &results, im_info)) {
+        if (!self.Run(inputs, &results, ims_info)) {
           pybind11::eval("raise Exception('Failed to postprocess the runtime result in YOLOv5Postprocessor.')");
         }
         return results;
       })
       .def("run", [](vision::detection::YOLOv5Postprocessor& self, std::vector<pybind11::array>& input_array,
-                     const std::map<std::string, std::array<float, 2>>& im_info) {
+                     const std::vector<std::map<std::string, std::array<float, 2>>>& ims_info) {
         std::vector<vision::DetectionResult> results;
         std::vector<FDTensor> inputs;
         PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
-        if (!self.Run(inputs, &results, im_info)) {
+        if (!self.Run(inputs, &results, ims_info)) {
           pybind11::eval("raise Exception('Failed to postprocess the runtime result in YOLOv5Postprocessor.')");
         }
         return results;
diff --git a/python/fastdeploy/vision/detection/contrib/yolov5.py b/python/fastdeploy/vision/detection/contrib/yolov5.py
index a7fb904c952..60ee46eec34 100644
--- a/python/fastdeploy/vision/detection/contrib/yolov5.py
+++ b/python/fastdeploy/vision/detection/contrib/yolov5.py
@@ -67,14 +67,14 @@ def __init__(self):
         """
         self._postprocessor = C.vision.detection.YOLOv5Postprocessor()
 
-    def run(self, runtime_results, im_info):
+    def run(self, runtime_results, ims_info):
         """Postprocess the runtime results for YOLOv5
 
         :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
-        :param: im_info: (dict)Record input_shape and output_shape
+        :param: ims_info: (list of dict)Record input_shape and output_shape
         :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
         """
-        return self._postprocessor.run(runtime_results, im_info)
+        return self._postprocessor.run(runtime_results, ims_info)
 
     @property
     def conf_threshold(self):

From 4b9cc780795ad027a576f6f5417a266bd460262c Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Sun, 13 Nov 2022 08:34:10 +0000
Subject: [PATCH 32/50] fixed bug

---
 fastdeploy/vision/detection/contrib/yolov5/yolov5.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
index affad85f527..40e343891b6 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
@@ -77,7 +77,7 @@ bool YOLOv5::BatchPredict(const std::vector<cv::Mat>& images, std::vector<Detect
     return false;
   }
 
-  if (!postprocessor_.Run(reused_output_tensors_, results, im_info)) {
+  if (!postprocessor_.Run(reused_output_tensors_, results, ims_info)) {
     FDERROR << "Failed to postprocess the inference results by runtime." << std::endl;
     return false;
   }

From c72cafa79403fe662fc2fd69dada10b64aa26ce6 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Sun, 13 Nov 2022 08:42:50 +0000
Subject: [PATCH 33/50] add bs_predict for yolov5

---
 tests/models/test_yolov5.py | 118 ++++++++++++++++++++++++++++++++++++
 1 file changed, 118 insertions(+)
 create mode 100755 tests/models/test_yolov5.py

diff --git a/tests/models/test_yolov5.py b/tests/models/test_yolov5.py
new file mode 100755
index 00000000000..5a32fb2289b
--- /dev/null
+++ b/tests/models/test_yolov5.py
@@ -0,0 +1,118 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import fastdeploy as fd
+import cv2
+import os
+import pickle
+import numpy as np
+import runtime_config as rc
+
+
+def test_detection_yolov5():
+    model_url = "https://bj.bcebos.com/paddlehub/fastdeploy/yolov5s.onnx"
+    input_url1 = "https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg"
+    input_url2 = "https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000570688.jpg"
+    result_url1 = "https://bj.bcebos.com/paddlehub/fastdeploy/yolov5_result1.pkl"
+    result_url2 = "https://bj.bcebos.com/paddlehub/fastdeploy/yolov5_result2.pkl"
+    fd.download(model_url, "resources")
+    fd.download(input_url1, "resources")
+    fd.download(input_url2, "resources")
+    fd.download(result_url1, "resources")
+    fd.download(result_url2, "resources")
+
+    model_file = "resources/yolov5s.onnx"
+    model = fd.vision.detection.YOLOv5(
+        model_file, runtime_option=rc.test_option)
+
+    with open("resources/yolov5_result1.pkl", "rb") as f:
+        expect1 = pickle.load(f)
+
+    with open("resources/yolov5_result2.pkl", "rb") as f:
+        expect2 = pickle.load(f)
+
+    # compare diff
+    im1 = cv2.imread("./resources/000000014439.jpg")
+    im2 = cv2.imread("./resources/000000570688.jpg")
+
+    for i in range(3):
+        # test single predict
+        result1 = model.predict(im1)
+        result2 = model.predict(im2)
+
+        diff_boxes_1 = np.fabs(
+            np.array(result1.boxes) - np.array(expect1["boxes"]))
+        diff_boxes_2 = np.fabs(
+            np.array(result2.boxes) - np.array(expect2["boxes"]))
+
+        diff_label_1 = np.fabs(
+            np.array(result1.label_ids) - np.array(expect1["label_ids"]))
+        diff_label_2 = np.fabs(
+            np.array(result2.label_ids) - np.array(expect2["label_ids"]))
+
+        diff_scores_1 = np.fabs(
+            np.array(result1.scores) - np.array(expect1["scores"]))
+        diff_scores_2 = np.fabs(
+            np.array(result2.scores) - np.array(expect2["scores"]))
+
+        assert diff_boxes_1.max(
+        ) < 1e-06, "There's difference in detection boxes 1."
+        assert diff_label_1.max(
+        ) < 1e-06, "There's difference in detection label 1."
+        assert diff_scores_1.max(
+        ) < 1e-05, "There's difference in detection score 1."
+
+        assert diff_boxes_2.max(
+        ) < 1e-06, "There's difference in detection boxes 2."
+        assert diff_label_2.max(
+        ) < 1e-06, "There's difference in detection label 2."
+        assert diff_scores_2.max(
+        ) < 1e-05, "There's difference in detection score 2."
+
+        # test batch predict
+        results = model.batch_predict([im1, im2])
+        result1 = results[0]
+        result2 = results[1]
+
+        diff_boxes_1 = np.fabs(
+            np.array(result1.boxes) - np.array(expect1["boxes"]))
+        diff_boxes_2 = np.fabs(
+            np.array(result2.boxes) - np.array(expect2["boxes"]))
+
+        diff_label_1 = np.fabs(
+            np.array(result1.label_ids) - np.array(expect1["label_ids"]))
+        diff_label_2 = np.fabs(
+            np.array(result2.label_ids) - np.array(expect2["label_ids"]))
+
+        diff_scores_1 = np.fabs(
+            np.array(result1.scores) - np.array(expect1["scores"]))
+        diff_scores_2 = np.fabs(
+            np.array(result2.scores) - np.array(expect2["scores"]))
+        assert diff_boxes_1.max(
+        ) < 1e-06, "There's difference in detection boxes 1."
+        assert diff_label_1.max(
+        ) < 1e-06, "There's difference in detection label 1."
+        assert diff_scores_1.max(
+        ) < 1e-05, "There's difference in detection score 1."
+
+        assert diff_boxes_2.max(
+        ) < 1e-06, "There's difference in detection boxes 2."
+        assert diff_label_2.max(
+        ) < 1e-06, "There's difference in detection label 2."
+        assert diff_scores_2.max(
+        ) < 1e-05, "There's difference in detection score 2."
+
+
+if __name__ == "__main__":
+    test_detection_yolov5()

From 0f3b4a75f0dd6aa5d7565e6d5a42673f9577744b Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Mon, 14 Nov 2022 07:21:36 +0000
Subject: [PATCH 34/50] Add runtime test and batch eval

---
 .../detection/contrib/yolov5/preprocessor.cc  |  1 +
 .../vision/detection/contrib/yolov5/yolov5.cc |  1 -
 .../fastdeploy/vision/evaluation/detection.py | 51 ++++++++++++++-----
 tests/models/test_yolov5.py                   | 26 ++++++++++
 4 files changed, 65 insertions(+), 14 deletions(-)

diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
index 7d2ffc0b884..796104c3699 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
@@ -103,6 +103,7 @@ bool YOLOv5Preprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>*
     FDERROR << "The size of input images should be greater than 0." << std::endl;
     return false;
   }
+  ims_info->resize(images.size());
   outputs->resize(1);
   // Concat all the preprocessed data to a batch tensor
   std::vector<FDTensor> tensors(images->size()); 
diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
index 40e343891b6..422cf501c42 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
@@ -63,7 +63,6 @@ bool YOLOv5::Predict(const cv::Mat& im, DetectionResult* result) {
 
 bool YOLOv5::BatchPredict(const std::vector<cv::Mat>& images, std::vector<DetectionResult>* results) {
   std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
-  ims_info.resize(images.size());
   std::vector<FDMat> fd_images = WrapMat(images);
 
   if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) {
diff --git a/python/fastdeploy/vision/evaluation/detection.py b/python/fastdeploy/vision/evaluation/detection.py
index 98c6794feab..d670729ef2a 100644
--- a/python/fastdeploy/vision/evaluation/detection.py
+++ b/python/fastdeploy/vision/evaluation/detection.py
@@ -23,7 +23,8 @@ def eval_detection(model,
                    ann_file,
                    conf_threshold=None,
                    nms_iou_threshold=None,
-                   plot=False):
+                   plot=False,
+                   batch_size=1):
     from .utils import CocoDetection
     from .utils import COCOMetric
     import cv2
@@ -61,19 +62,43 @@ def eval_detection(model,
             start_time = time.time()
         im = cv2.imread(image_info["image"])
         im_id = image_info["im_id"]
-        if conf_threshold is None and nms_iou_threshold is None:
-            result = model.predict(im.copy())
+        if batch_size == 1:
+            if conf_threshold is None and nms_iou_threshold is None:
+                result = model.predict(im.copy())
+            else:
+                result = model.predict(im, conf_threshold, nms_iou_threshold)
+            pred = {
+                'bbox': [[c] + [s] + b
+                         for b, s, c in zip(result.boxes, result.scores,
+                                            result.label_ids)],
+                'bbox_num': len(result.boxes),
+                'im_id': im_id
+            }
+            eval_metric.update(im_id, pred)
         else:
-            result = model.predict(im, conf_threshold, nms_iou_threshold)
-        pred = {
-            'bbox':
-            [[c] + [s] + b
-             for b, s, c in zip(result.boxes, result.scores, result.label_ids)
-             ],
-            'bbox_num': len(result.boxes),
-            'im_id': im_id
-        }
-        eval_metric.update(im_id, pred)
+            im_list = list()
+            im_id_list = list()
+            im_list.append(im)
+            im_id_list.append(im_id)
+            if (i + 1) % batch_size != 0:
+                continue
+            if conf_threshold is None and nms_iou_threshold is None:
+                results = model.batch_predict(im_list)
+            else:
+                model.postprocessor.conf_threshold = conf_threshold
+                model.postprocessor.nms_threshold = nms_iou_threshold
+                results = model.batch_predict(im_list, conf_threshold,
+                                              nms_iou_threshold)
+            for b in range(batch_size):
+                pred = {
+                    'bbox': [[c] + [s] + b
+                             for b, s, c in zip(results[b].boxes, results[
+                                 b].scores, results[b].label_ids)],
+                    'bbox_num': len(results[b].boxes),
+                    'im_id': im_id_list[b]
+                }
+                eval_metric.update(im_id_list[b], pred)
+
         if i == image_num - 1:
             end_time = time.time()
     average_inference_time = round(
diff --git a/tests/models/test_yolov5.py b/tests/models/test_yolov5.py
index 5a32fb2289b..7cc9f082299 100755
--- a/tests/models/test_yolov5.py
+++ b/tests/models/test_yolov5.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from fastdeploy import ModelFormat
 import fastdeploy as fd
 import cv2
 import os
@@ -36,6 +37,12 @@ def test_detection_yolov5():
     model = fd.vision.detection.YOLOv5(
         model_file, runtime_option=rc.test_option)
 
+    preprocessor = fd.vision.detection.YOLOv5Preprocessor()
+    postprocessor = fd.vision.detection.YOLOv5Postprocessor()
+
+    rc.test_option.set_model_path(model_file, model_format=ModelFormat.ONNX)
+    runtime = fd.Runtime(rc.test_option)
+
     with open("resources/yolov5_result1.pkl", "rb") as f:
         expect1 = pickle.load(f)
 
@@ -47,6 +54,25 @@ def test_detection_yolov5():
     im2 = cv2.imread("./resources/000000570688.jpg")
 
     for i in range(3):
+        # test runtime
+        input_tensors, ims_info = preprocessor.run([im1])
+        output_tensors = runtime.infer({"images": input_tensors[0]})
+        results = postprocessor.run(output_tensors, ims_info)
+        result1 = results[0]
+
+        diff_boxes_1 = np.fabs(
+            np.array(result1.boxes) - np.array(expect1["boxes"]))
+        diff_label_1 = np.fabs(
+            np.array(result1.label_ids) - np.array(expect1["label_ids"]))
+        diff_scores_1 = np.fabs(
+            np.array(result1.scores) - np.array(expect1["scores"]))
+
+        assert diff_boxes_1.max(
+        ) < 1e-06, "There's difference in detection boxes 1."
+        assert diff_label_1.max(
+        ) < 1e-06, "There's difference in detection label 1."
+        assert diff_scores_1.max(
+        ) < 1e-05, "There's difference in detection score 1."
         # test single predict
         result1 = model.predict(im1)
         result2 = model.predict(im2)

From 386b9081fd0a82ca53b4c3a73228808ee252e6ac Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Mon, 14 Nov 2022 07:26:59 +0000
Subject: [PATCH 35/50] deal with comments

---
 fastdeploy/vision/common/processors/convert_and_permute.cc | 4 ++--
 fastdeploy/vision/common/processors/convert_and_permute.h  | 2 +-
 fastdeploy/vision/detection/contrib/yolov5/postprocessor.h | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fastdeploy/vision/common/processors/convert_and_permute.cc b/fastdeploy/vision/common/processors/convert_and_permute.cc
index 042e9a13962..73cbb5b48f8 100644
--- a/fastdeploy/vision/common/processors/convert_and_permute.cc
+++ b/fastdeploy/vision/common/processors/convert_and_permute.cc
@@ -21,9 +21,9 @@ ConvertAndPermute::ConvertAndPermute(const std::vector<float>& alpha,
                                      const std::vector<float>& beta,
                                      bool swap_rb) {
   FDASSERT(alpha.size() == beta.size(),
-           "Convert: requires the size of alpha equal to the size of beta.");
+           "ConvertAndPermute: requires the size of alpha equal to the size of beta.");
   FDASSERT(alpha.size() > 0 && beta.size() > 0,
-           "Convert: requires the size of alpha and beta > 0.");
+           "ConvertAndPermute: requires the size of alpha and beta > 0.");
   alpha_.assign(alpha.begin(), alpha.end());
   beta_.assign(beta.begin(), beta.end());
   swap_rb_ = swap_rb;
diff --git a/fastdeploy/vision/common/processors/convert_and_permute.h b/fastdeploy/vision/common/processors/convert_and_permute.h
index d4fc5da1213..a36dfd56383 100644
--- a/fastdeploy/vision/common/processors/convert_and_permute.h
+++ b/fastdeploy/vision/common/processors/convert_and_permute.h
@@ -53,7 +53,7 @@ class FASTDEPLOY_DECL ConvertAndPermute : public Processor {
     return swap_rb_;
   }
 
-  void SetSwapRB(const bool& swap_rb) {
+  void SetSwapRB(bool swap_rb) {
     swap_rb_ = swap_rb;
   }
 
diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
index 9623165f951..a1479dd9403 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
@@ -56,7 +56,7 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor {
   float GetNMSThreshold() const { return nms_threshold_; }
 
   /// Set multi_label, default true
-  void SetMultiLabel(const bool& multi_label) {
+  void SetMultiLabel(bool multi_label) {
     multi_label_ = multi_label;
   }
 

From 810d7cd78157a5e8676c7f8ce89bfe5adf8bf262 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Mon, 14 Nov 2022 07:32:58 +0000
Subject: [PATCH 36/50] fixed bug

---
 fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
index 796104c3699..112a4d4d5da 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
@@ -103,7 +103,7 @@ bool YOLOv5Preprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>*
     FDERROR << "The size of input images should be greater than 0." << std::endl;
     return false;
   }
-  ims_info->resize(images.size());
+  ims_info->resize(images->size());
   outputs->resize(1);
   // Concat all the preprocessed data to a batch tensor
   std::vector<FDTensor> tensors(images->size()); 

From b2002324bd8d681e1b792ab2f6899e2df2e9dd98 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Mon, 14 Nov 2022 08:45:41 +0000
Subject: [PATCH 37/50] update testcase

---
 tests/models/test_yolov5.py | 71 ++++++++++++++++++++++++-------------
 1 file changed, 46 insertions(+), 25 deletions(-)

diff --git a/tests/models/test_yolov5.py b/tests/models/test_yolov5.py
index 7cc9f082299..3d79ed33d61 100755
--- a/tests/models/test_yolov5.py
+++ b/tests/models/test_yolov5.py
@@ -37,12 +37,6 @@ def test_detection_yolov5():
     model = fd.vision.detection.YOLOv5(
         model_file, runtime_option=rc.test_option)
 
-    preprocessor = fd.vision.detection.YOLOv5Preprocessor()
-    postprocessor = fd.vision.detection.YOLOv5Postprocessor()
-
-    rc.test_option.set_model_path(model_file, model_format=ModelFormat.ONNX)
-    runtime = fd.Runtime(rc.test_option)
-
     with open("resources/yolov5_result1.pkl", "rb") as f:
         expect1 = pickle.load(f)
 
@@ -54,25 +48,6 @@ def test_detection_yolov5():
     im2 = cv2.imread("./resources/000000570688.jpg")
 
     for i in range(3):
-        # test runtime
-        input_tensors, ims_info = preprocessor.run([im1])
-        output_tensors = runtime.infer({"images": input_tensors[0]})
-        results = postprocessor.run(output_tensors, ims_info)
-        result1 = results[0]
-
-        diff_boxes_1 = np.fabs(
-            np.array(result1.boxes) - np.array(expect1["boxes"]))
-        diff_label_1 = np.fabs(
-            np.array(result1.label_ids) - np.array(expect1["label_ids"]))
-        diff_scores_1 = np.fabs(
-            np.array(result1.scores) - np.array(expect1["scores"]))
-
-        assert diff_boxes_1.max(
-        ) < 1e-06, "There's difference in detection boxes 1."
-        assert diff_label_1.max(
-        ) < 1e-06, "There's difference in detection label 1."
-        assert diff_scores_1.max(
-        ) < 1e-05, "There's difference in detection score 1."
         # test single predict
         result1 = model.predict(im1)
         result2 = model.predict(im2)
@@ -140,5 +115,51 @@ def test_detection_yolov5():
         ) < 1e-05, "There's difference in detection score 2."
 
 
+def test_detection_yolov5_runtime():
+    model_url = "https://bj.bcebos.com/paddlehub/fastdeploy/yolov5s.onnx"
+    input_url1 = "https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg"
+    result_url1 = "https://bj.bcebos.com/paddlehub/fastdeploy/yolov5_result1.pkl"
+    fd.download(model_url, "resources")
+    fd.download(input_url1, "resources")
+    fd.download(result_url1, "resources")
+
+    model_file = "resources/yolov5s.onnx"
+
+    preprocessor = fd.vision.detection.YOLOv5Preprocessor()
+    postprocessor = fd.vision.detection.YOLOv5Postprocessor()
+
+    rc.test_option.set_model_path(model_file, model_format=ModelFormat.ONNX)
+    rc.test_option.use_openvino_backend()
+    runtime = fd.Runtime(rc.test_option)
+
+    with open("resources/yolov5_result1.pkl", "rb") as f:
+        expect1 = pickle.load(f)
+
+    # compare diff
+    im1 = cv2.imread("./resources/000000014439.jpg")
+
+    for i in range(3):
+        # test runtime
+        input_tensors, ims_info = preprocessor.run([im1.copy()])
+        output_tensors = runtime.infer({"images": input_tensors[0]})
+        results = postprocessor.run(output_tensors, ims_info)
+        result1 = results[0]
+
+        diff_boxes_1 = np.fabs(
+            np.array(result1.boxes) - np.array(expect1["boxes"]))
+        diff_label_1 = np.fabs(
+            np.array(result1.label_ids) - np.array(expect1["label_ids"]))
+        diff_scores_1 = np.fabs(
+            np.array(result1.scores) - np.array(expect1["scores"]))
+
+        assert diff_boxes_1.max(
+        ) < 1e-04, "There's difference in detection boxes 1."
+        assert diff_label_1.max(
+        ) < 1e-06, "There's difference in detection label 1."
+        assert diff_scores_1.max(
+        ) < 1e-05, "There's difference in detection score 1."
+
+
 if __name__ == "__main__":
     test_detection_yolov5()
+    test_detection_yolov5_runtime()

From 97085a2065db952941eaeb1bdaccc94ac82002f1 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Mon, 14 Nov 2022 11:40:49 +0000
Subject: [PATCH 38/50] fixed batch eval bug

---
 .../vision/detection/contrib/yolov5.py        |  6 ++---
 .../fastdeploy/vision/evaluation/detection.py | 24 ++++++++++---------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/python/fastdeploy/vision/detection/contrib/yolov5.py b/python/fastdeploy/vision/detection/contrib/yolov5.py
index 60ee46eec34..42eccb88d4a 100644
--- a/python/fastdeploy/vision/detection/contrib/yolov5.py
+++ b/python/fastdeploy/vision/detection/contrib/yolov5.py
@@ -140,17 +140,17 @@ def __init__(self,
         # 通过self.initialized判断整个模型的初始化是否成功
         assert self.initialized, "YOLOv5 initialize failed."
 
-    def predict(self, input_image, conf_threshold=0.25, nms_threshold=0.5):
+    def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5):
         """Detect an input image
 
         :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
         :param conf_threshold: confidence threshold for postprocessing, default is 0.25
-        :param nms_threshold: iou threshold for NMS, default is 0.5
+        :param nms_iou_threshold: iou threshold for NMS, default is 0.5
         :return: DetectionResult
         """
 
         self.postprocessor.conf_threshold = conf_threshold
-        self.postprocessor.nms_threshold = nms_threshold
+        self.postprocessor.nms_threshold = nms_iou_threshold
         return self._model.predict(input_image)
 
     def batch_predict(self, images):
diff --git a/python/fastdeploy/vision/evaluation/detection.py b/python/fastdeploy/vision/evaluation/detection.py
index d670729ef2a..a13e0429e02 100644
--- a/python/fastdeploy/vision/evaluation/detection.py
+++ b/python/fastdeploy/vision/evaluation/detection.py
@@ -55,6 +55,8 @@ def eval_detection(model,
     start_time = 0
     end_time = 0
     average_inference_time = 0
+    im_list = list()
+    im_id_list = list()
     for image_info, i in zip(all_image_info,
                              trange(
                                  image_num, desc="Inference Progress")):
@@ -76,28 +78,28 @@ def eval_detection(model,
             }
             eval_metric.update(im_id, pred)
         else:
-            im_list = list()
-            im_id_list = list()
             im_list.append(im)
             im_id_list.append(im_id)
-            if (i + 1) % batch_size != 0:
+            # If the batch_size is not satisfied, the remaining pictures are formed into a batch
+            if (i + 1) % batch_size != 0 and i != image_num - 1:
                 continue
             if conf_threshold is None and nms_iou_threshold is None:
                 results = model.batch_predict(im_list)
             else:
                 model.postprocessor.conf_threshold = conf_threshold
                 model.postprocessor.nms_threshold = nms_iou_threshold
-                results = model.batch_predict(im_list, conf_threshold,
-                                              nms_iou_threshold)
-            for b in range(batch_size):
+                results = model.batch_predict(im_list)
+            for k in range(len(im_list)):
                 pred = {
                     'bbox': [[c] + [s] + b
-                             for b, s, c in zip(results[b].boxes, results[
-                                 b].scores, results[b].label_ids)],
-                    'bbox_num': len(results[b].boxes),
-                    'im_id': im_id_list[b]
+                             for b, s, c in zip(results[k].boxes, results[
+                                 k].scores, results[k].label_ids)],
+                    'bbox_num': len(results[k].boxes),
+                    'im_id': im_id_list[k]
                 }
-                eval_metric.update(im_id_list[b], pred)
+                eval_metric.update(im_id_list[k], pred)
+            im_list.clear()
+            im_id_list.clear()
 
         if i == image_num - 1:
             end_time = time.time()

From ac9b1a7b32d0f57c73e808f9cfa0acc97eeb23a7 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Mon, 14 Nov 2022 12:55:39 +0000
Subject: [PATCH 39/50] fixed preprocess bug

---
 fastdeploy/core/fd_tensor.cc                              | 8 ++++++++
 fastdeploy/core/fd_tensor.h                               | 8 +-------
 .../vision/detection/contrib/yolov5/yolov5_pybind.cc      | 3 +++
 3 files changed, 12 insertions(+), 7 deletions(-)
 mode change 100644 => 100755 fastdeploy/core/fd_tensor.cc
 mode change 100644 => 100755 fastdeploy/core/fd_tensor.h

diff --git a/fastdeploy/core/fd_tensor.cc b/fastdeploy/core/fd_tensor.cc
old mode 100644
new mode 100755
index a3544756b37..86ce866f4e5
--- a/fastdeploy/core/fd_tensor.cc
+++ b/fastdeploy/core/fd_tensor.cc
@@ -43,6 +43,14 @@ const void* FDTensor::Data() const {
   return buffer_;
 }
 
+void FDTensor::StopSharing() {
+  if (IsShared()) {
+    ReallocFn(Nbytes());
+    CopyBuffer(buffer_, external_data_ptr, Nbytes());
+    external_data_ptr = nullptr;
+  }
+}
+
 const void* FDTensor::CpuData() const {
   if (device == Device::GPU) {
 #ifdef WITH_GPU
diff --git a/fastdeploy/core/fd_tensor.h b/fastdeploy/core/fd_tensor.h
old mode 100644
new mode 100755
index 32a0da86798..7deb4822988
--- a/fastdeploy/core/fd_tensor.h
+++ b/fastdeploy/core/fd_tensor.h
@@ -61,13 +61,7 @@ struct FASTDEPLOY_DECL FDTensor {
     return external_data_ptr != nullptr;
   }
 
-  void StopSharing() {
-    if (IsShared()) {
-      ReallocFn(Nbytes());
-      CopyBuffer(buffer_, external_data_ptr, Nbytes());
-      external_data_ptr = nullptr;
-    }
-  }
+  void StopSharing();
 
   const void* Data() const;
 
diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
index 4e2f69957e4..f44891d9846 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
@@ -29,6 +29,9 @@ void BindYOLOv5(pybind11::module& m) {
         if (!self.Run(&images, &outputs, &ims_info)) {
           pybind11::eval("raise Exception('Failed to preprocess the input data in PaddleClasPreprocessor.')");
         }
+        for (size_t i = 0; i < outputs.size(); ++i) {
+          outputs[i].StopSharing();
+        }
         return make_pair(outputs, ims_info);
       })
       .def_property("size", &vision::detection::YOLOv5Preprocessor::GetSize, &vision::detection::YOLOv5Preprocessor::SetSize)

From aa06dc6bc576f230758c019872bf26990a7547d2 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Wed, 16 Nov 2022 08:15:17 +0000
Subject: [PATCH 40/50] refactor yolov7

---
 fastdeploy/vision.h                           |   2 +-
 .../detection/contrib/yolov5/postprocessor.cc |   6 +-
 .../detection/contrib/yolov5/postprocessor.h  |   4 +-
 .../detection/contrib/yolov5/preprocessor.cc  |  26 +-
 .../detection/contrib/yolov5/preprocessor.h   |  11 +
 .../detection/contrib/yolov5/yolov5_pybind.cc |   3 +-
 fastdeploy/vision/detection/contrib/yolov7.cc | 344 ------------------
 fastdeploy/vision/detection/contrib/yolov7.h  | 113 ------
 .../detection/contrib/yolov7/postprocessor.cc | 103 ++++++
 .../detection/contrib/yolov7/postprocessor.h  |  66 ++++
 .../detection/contrib/yolov7/preprocessor.cc  | 131 +++++++
 .../detection/contrib/yolov7/preprocessor.h   |  98 +++++
 .../vision/detection/contrib/yolov7/yolov7.cc |  89 +++++
 .../vision/detection/contrib/yolov7/yolov7.h  |  88 +++++
 .../detection/contrib/yolov7/yolov7_pybind.cc |  87 +++++
 .../vision/detection/contrib/yolov7_pybind.cc |  42 ---
 .../fastdeploy/vision/detection/__init__.py   |   2 +-
 .../vision/detection/contrib/yolov5.py        |  19 +-
 .../vision/detection/contrib/yolov7.py        | 205 ++++++-----
 19 files changed, 839 insertions(+), 600 deletions(-)
 delete mode 100755 fastdeploy/vision/detection/contrib/yolov7.cc
 delete mode 100644 fastdeploy/vision/detection/contrib/yolov7.h
 create mode 100755 fastdeploy/vision/detection/contrib/yolov7/postprocessor.cc
 create mode 100755 fastdeploy/vision/detection/contrib/yolov7/postprocessor.h
 create mode 100755 fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc
 create mode 100755 fastdeploy/vision/detection/contrib/yolov7/preprocessor.h
 create mode 100755 fastdeploy/vision/detection/contrib/yolov7/yolov7.cc
 create mode 100755 fastdeploy/vision/detection/contrib/yolov7/yolov7.h
 create mode 100755 fastdeploy/vision/detection/contrib/yolov7/yolov7_pybind.cc
 delete mode 100644 fastdeploy/vision/detection/contrib/yolov7_pybind.cc

diff --git a/fastdeploy/vision.h b/fastdeploy/vision.h
index 2f8c7066131..5dc9f4aa3da 100755
--- a/fastdeploy/vision.h
+++ b/fastdeploy/vision.h
@@ -24,7 +24,7 @@
 #include "fastdeploy/vision/detection/contrib/yolov5/yolov5.h"
 #include "fastdeploy/vision/detection/contrib/yolov5lite.h"
 #include "fastdeploy/vision/detection/contrib/yolov6.h"
-#include "fastdeploy/vision/detection/contrib/yolov7.h"
+#include "fastdeploy/vision/detection/contrib/yolov7/yolov7.h"
 #include "fastdeploy/vision/detection/contrib/yolov7end2end_ort.h"
 #include "fastdeploy/vision/detection/contrib/yolov7end2end_trt.h"
 #include "fastdeploy/vision/detection/contrib/yolox.h"
diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
index dd61efb0023..0366fcce006 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
@@ -22,7 +22,7 @@ namespace detection {
 YOLOv5Postprocessor::YOLOv5Postprocessor() {
   conf_threshold_ = 0.25;
   nms_threshold_ = 0.5;
-  multi_label_ = true;
+  multi_label_ = false;
   max_wh_ = 7680.0;
 }
 
@@ -103,9 +103,9 @@ bool YOLOv5Postprocessor::Run(const std::vector<FDTensor>& tensors, std::vector<
     float ipt_h = iter_ipt->second[0];
     float ipt_w = iter_ipt->second[1];
     float scale = std::min(out_h / ipt_h, out_w / ipt_w);
+    float pad_h = (out_h - ipt_h * scale) / 2;
+    float pad_w = (out_w - ipt_w * scale) / 2;
     for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) {
-      float pad_h = (out_h - ipt_h * scale) / 2;
-      float pad_w = (out_w - ipt_w * scale) / 2;
       int32_t label_id = ((*results)[bs].label_ids)[i];
       // clip box
       (*results)[bs].boxes[i][0] = (*results)[bs].boxes[i][0] - max_wh_ * label_id;
diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
index a1479dd9403..c0ccf58de96 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
@@ -55,12 +55,12 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor {
   /// Get nms_threshold, default 0.5
   float GetNMSThreshold() const { return nms_threshold_; }
 
-  /// Set multi_label, default true
+  /// Set multi_label, set true for eval, default false
   void SetMultiLabel(bool multi_label) {
     multi_label_ = multi_label;
   }
 
-  /// Get multi_label, default true
+  /// Get multi_label, default false
   bool GetMultiLabel() const { return multi_label_; }
 
  protected:
diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
index 112a4d4d5da..0933f7f108a 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
@@ -20,6 +20,7 @@ namespace vision {
 namespace detection {
 
 YOLOv5Preprocessor::YOLOv5Preprocessor() {
+  resize_after_load_ = false;
   size_ = {640, 640};
   padding_value_ = {114.0, 114.0, 114.0};
   is_mini_pad_ = false;
@@ -50,7 +51,9 @@ void YOLOv5Preprocessor::LetterBox(FDMat* mat) {
     resize_h = size_[1];
     resize_w = size_[0];
   }
-  Resize::Run(mat, resize_w, resize_h);
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
   if (pad_h > 0 || pad_w > 0) {
     float half_h = pad_h * 1.0 / 2;
     int top = int(round(half_h - 0.1));
@@ -67,18 +70,19 @@ bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
   // Record the shape of image and the shape of preprocessed image
   (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
                                static_cast<float>(mat->Width())};
-
   // process after image load
-  double ratio = (size_[0] * 1.0) / std::max(static_cast<float>(mat->Height()),
-                                            static_cast<float>(mat->Width()));
-  if (std::fabs(ratio - 1.0f) > 1e-06) {
-    int interp = cv::INTER_AREA;
-    if (ratio > 1.0) {
-      interp = cv::INTER_LINEAR;
+  if (resize_after_load_) {
+    double ratio = (size_[0] * 1.0) / std::max(static_cast<float>(mat->Height()),
+                                              static_cast<float>(mat->Width()));
+    if (std::fabs(ratio - 1.0f) > 1e-06) {
+      int interp = cv::INTER_AREA;
+      if (ratio > 1.0) {
+        interp = cv::INTER_LINEAR;
+      }
+      int resize_h = int(mat->Height() * ratio);
+      int resize_w = int(mat->Width() * ratio);
+      Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
     }
-    int resize_h = int(mat->Height() * ratio);
-    int resize_w = int(mat->Width() * ratio);
-    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
   }
   // yolov5's preprocess steps
   // 1. letterbox
diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
index b3559685db7..41aa25466cb 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
@@ -52,12 +52,23 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor {
   /// Get padding value, size should be the same as channels
   std::vector<float> GetPaddingValue() const { return padding_value_; }
 
+  /// Set resize_after_load, may have an impact on map, default false
+  void SetResizeAfterLoad(bool resize_after_load) {
+    resize_after_load_ = resize_after_load;
+  }
+
+  /// Get resize_after_load, default false
+  bool GetResizeAfterLoad() const { return resize_after_load_; }
+
  protected:
   bool Preprocess(FDMat* mat, FDTensor* output,
                   std::map<std::string, std::array<float, 2>>* im_info);
 
   void LetterBox(FDMat* mat);
 
+  // whether resize after image load, may have an impact on map, default false
+  bool resize_after_load_;
+
   // target size, tuple of (width, height), default size = {640, 640}
   std::vector<int> size_;
 
diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
index f44891d9846..03e223e82fc 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
@@ -35,7 +35,8 @@ void BindYOLOv5(pybind11::module& m) {
         return make_pair(outputs, ims_info);
       })
       .def_property("size", &vision::detection::YOLOv5Preprocessor::GetSize, &vision::detection::YOLOv5Preprocessor::SetSize)
-      .def_property("padding_value", &vision::detection::YOLOv5Preprocessor::GetPaddingValue, &vision::detection::YOLOv5Preprocessor::SetPaddingValue);
+      .def_property("padding_value", &vision::detection::YOLOv5Preprocessor::GetPaddingValue, &vision::detection::YOLOv5Preprocessor::SetPaddingValue)
+      .def_property("resize_after_load", &vision::detection::YOLOv5Preprocessor::GetResizeAfterLoad, &vision::detection::YOLOv5Preprocessor::SetResizeAfterLoad);
 
   pybind11::class_<vision::detection::YOLOv5Postprocessor>(
       m, "YOLOv5Postprocessor")
diff --git a/fastdeploy/vision/detection/contrib/yolov7.cc b/fastdeploy/vision/detection/contrib/yolov7.cc
deleted file mode 100755
index 9185e16ed0e..00000000000
--- a/fastdeploy/vision/detection/contrib/yolov7.cc
+++ /dev/null
@@ -1,344 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/detection/contrib/yolov7.h"
-
-#include "fastdeploy/utils/perf.h"
-#include "fastdeploy/vision/utils/utils.h"
-#ifdef ENABLE_CUDA_PREPROCESS
-#include "fastdeploy/vision/utils/cuda_utils.h"
-#endif  // ENABLE_CUDA_PREPROCESS
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-void YOLOv7::LetterBox(Mat* mat, const std::vector<int>& size,
-                       const std::vector<float>& color, bool _auto,
-                       bool scale_fill, bool scale_up, int stride) {
-  float scale =
-      std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width());
-  if (!scale_up) {
-    scale = std::min(scale, 1.0f);
-  }
-
-  int resize_h = int(round(mat->Height() * scale));
-  int resize_w = int(round(mat->Width() * scale));
-
-  int pad_w = size[0] - resize_w;
-  int pad_h = size[1] - resize_h;
-  if (_auto) {
-    pad_h = pad_h % stride;
-    pad_w = pad_w % stride;
-  } else if (scale_fill) {
-    pad_h = 0;
-    pad_w = 0;
-    resize_h = size[1];
-    resize_w = size[0];
-  }
-  if (resize_h != mat->Height() || resize_w != mat->Width()) {
-    Resize::Run(mat, resize_w, resize_h);
-  }
-  if (pad_h > 0 || pad_w > 0) {
-    float half_h = pad_h * 1.0 / 2;
-    int top = int(round(half_h - 0.1));
-    int bottom = int(round(half_h + 0.1));
-    float half_w = pad_w * 1.0 / 2;
-    int left = int(round(half_w - 0.1));
-    int right = int(round(half_w + 0.1));
-    Pad::Run(mat, top, bottom, left, right, color);
-  }
-}
-
-YOLOv7::YOLOv7(const std::string& model_file, const std::string& params_file,
-               const RuntimeOption& custom_option,
-               const ModelFormat& model_format) {
-  if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};
-  } else {
-    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
-    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
-  }
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-#ifdef ENABLE_CUDA_PREPROCESS
-  cudaSetDevice(runtime_option.device_id);
-  cudaStream_t stream;
-  CUDA_CHECK(cudaStreamCreate(&stream));
-  cuda_stream_ = reinterpret_cast<void*>(stream);
-  runtime_option.SetExternalStream(cuda_stream_);
-#endif  // ENABLE_CUDA_PREPROCESS
-  initialized = Initialize();
-}
-
-bool YOLOv7::Initialize() {
-  // parameters for preprocess
-  size = {640, 640};
-  padding_value = {114.0, 114.0, 114.0};
-  is_mini_pad = false;
-  is_no_pad = false;
-  is_scale_up = false;
-  stride = 32;
-  max_wh = 7680.0;
-  reused_input_tensors_.resize(1);
-
-  if (!InitRuntime()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  // Check if the input shape is dynamic after Runtime already initialized,
-  // Note that, We need to force is_mini_pad 'false' to keep static
-  // shape after padding (LetterBox) when the is_dynamic_shape is 'false'.
-  is_dynamic_input_ = false;
-  auto shape = InputInfoOfRuntime(0).shape;
-  for (int i = 0; i < shape.size(); ++i) {
-    // if height or width is dynamic
-    if (i >= 2 && shape[i] <= 0) {
-      is_dynamic_input_ = true;
-      break;
-    }
-  }
-  if (!is_dynamic_input_) {
-    is_mini_pad = false;
-  }
-  return true;
-}
-
-YOLOv7::~YOLOv7() {
-#ifdef ENABLE_CUDA_PREPROCESS
-  if (use_cuda_preprocessing_) {
-    CUDA_CHECK(cudaFreeHost(input_img_cuda_buffer_host_));
-    CUDA_CHECK(cudaFree(input_img_cuda_buffer_device_));
-    CUDA_CHECK(cudaFree(input_tensor_cuda_buffer_device_));
-    CUDA_CHECK(cudaStreamDestroy(reinterpret_cast<cudaStream_t>(cuda_stream_)));
-  }
-#endif  // ENABLE_CUDA_PREPROCESS
-}
-
-bool YOLOv7::Preprocess(Mat* mat, FDTensor* output,
-                        std::map<std::string, std::array<float, 2>>* im_info) {
-  // process after image load
-  float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
-                         size[0] * 1.0f / static_cast<float>(mat->Width()));
-  if (std::fabs(ratio - 1.0f) > 1e-06) {
-    int interp = cv::INTER_AREA;
-    if (ratio > 1.0) {
-      interp = cv::INTER_LINEAR;
-    }
-    int resize_h = int(mat->Height() * ratio);
-    int resize_w = int(mat->Width() * ratio);
-    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
-  }
-  // yolov7's preprocess steps
-  // 1. letterbox
-  // 2. BGR->RGB
-  // 3. HWC->CHW
-  YOLOv7::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad,
-                    is_scale_up, stride);
-  BGR2RGB::Run(mat);
-  // Normalize::Run(mat, std::vector<float>(mat->Channels(), 0.0),
-  //                std::vector<float>(mat->Channels(), 1.0));
-  // Compute `result = mat * alpha + beta` directly by channel
-  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
-  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
-  Convert::Run(mat, alpha, beta);
-
-  // Record output shape of preprocessed image
-  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
-                                static_cast<float>(mat->Width())};
-
-  HWC2CHW::Run(mat);
-  Cast::Run(mat, "float");
-  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
-  return true;
-}
-
-void YOLOv7::UseCudaPreprocessing(int max_image_size) {
-#ifdef ENABLE_CUDA_PREPROCESS
-  use_cuda_preprocessing_ = true;
-  is_scale_up = true;
-  if (input_img_cuda_buffer_host_ == nullptr) {
-    // prepare input data cache in GPU pinned memory
-    CUDA_CHECK(cudaMallocHost((void**)&input_img_cuda_buffer_host_,
-                              max_image_size * 3));
-    // prepare input data cache in GPU device memory
-    CUDA_CHECK(
-        cudaMalloc((void**)&input_img_cuda_buffer_device_, max_image_size * 3));
-    CUDA_CHECK(cudaMalloc((void**)&input_tensor_cuda_buffer_device_,
-                          3 * size[0] * size[1] * sizeof(float)));
-  }
-#else
-  FDWARNING << "The FastDeploy didn't compile with BUILD_CUDA_SRC=ON."
-            << std::endl;
-  use_cuda_preprocessing_ = false;
-#endif
-}
-
-bool YOLOv7::CudaPreprocess(
-    Mat* mat, FDTensor* output,
-    std::map<std::string, std::array<float, 2>>* im_info) {
-#ifdef ENABLE_CUDA_PREPROCESS
-  if (is_mini_pad != false || is_no_pad != false || is_scale_up != true) {
-    FDERROR << "Preprocessing with CUDA is only available when the arguments "
-               "satisfy (is_mini_pad=false, is_no_pad=false, is_scale_up=true)."
-            << std::endl;
-    return false;
-  }
-
-  // Record the shape of image and the shape of preprocessed image
-  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
-                               static_cast<float>(mat->Width())};
-  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
-                                static_cast<float>(mat->Width())};
-
-  cudaStream_t stream = reinterpret_cast<cudaStream_t>(cuda_stream_);
-  int src_img_buf_size = mat->Height() * mat->Width() * mat->Channels();
-  memcpy(input_img_cuda_buffer_host_, mat->Data(), src_img_buf_size);
-  CUDA_CHECK(cudaMemcpyAsync(input_img_cuda_buffer_device_,
-                             input_img_cuda_buffer_host_, src_img_buf_size,
-                             cudaMemcpyHostToDevice, stream));
-  utils::CudaYoloPreprocess(input_img_cuda_buffer_device_, mat->Width(),
-                            mat->Height(), input_tensor_cuda_buffer_device_,
-                            size[0], size[1], padding_value, stream);
-
-  // Record output shape of preprocessed image
-  (*im_info)["output_shape"] = {static_cast<float>(size[0]),
-                                static_cast<float>(size[1])};
-
-  output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32,
-                          input_tensor_cuda_buffer_device_);
-  output->device = Device::GPU;
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
-  return true;
-#else
-  FDERROR << "CUDA src code was not enabled." << std::endl;
-  return false;
-#endif  // ENABLE_CUDA_PREPROCESS
-}
-
-bool YOLOv7::Postprocess(
-    FDTensor& infer_result, DetectionResult* result,
-    const std::map<std::string, std::array<float, 2>>& im_info,
-    float conf_threshold, float nms_iou_threshold) {
-  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
-  result->Clear();
-  result->Reserve(infer_result.shape[1]);
-  if (infer_result.dtype != FDDataType::FP32) {
-    FDERROR << "Only support post process with float32 data." << std::endl;
-    return false;
-  }
-  float* data = static_cast<float*>(infer_result.Data());
-  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
-    int s = i * infer_result.shape[2];
-    float confidence = data[s + 4];
-    float* max_class_score =
-        std::max_element(data + s + 5, data + s + infer_result.shape[2]);
-    confidence *= (*max_class_score);
-    // filter boxes by conf_threshold
-    if (confidence <= conf_threshold) {
-      continue;
-    }
-    int32_t label_id = std::distance(data + s + 5, max_class_score);
-    // convert from [x, y, w, h] to [x1, y1, x2, y2]
-    result->boxes.emplace_back(std::array<float, 4>{
-        data[s] - data[s + 2] / 2.0f + label_id * max_wh,
-        data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh,
-        data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh,
-        data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh});
-    result->label_ids.push_back(label_id);
-    result->scores.push_back(confidence);
-  }
-  utils::NMS(result, nms_iou_threshold);
-
-  // scale the boxes to the origin image shape
-  auto iter_out = im_info.find("output_shape");
-  auto iter_ipt = im_info.find("input_shape");
-  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
-           "Cannot find input_shape or output_shape from im_info.");
-  float out_h = iter_out->second[0];
-  float out_w = iter_out->second[1];
-  float ipt_h = iter_ipt->second[0];
-  float ipt_w = iter_ipt->second[1];
-  float scale = std::min(out_h / ipt_h, out_w / ipt_w);
-  float pad_h = (out_h - ipt_h * scale) / 2.0f;
-  float pad_w = (out_w - ipt_w * scale) / 2.0f;
-  if (is_mini_pad) {
-    pad_h = static_cast<float>(static_cast<int>(pad_h) % stride);
-    pad_w = static_cast<float>(static_cast<int>(pad_w) % stride);
-  }
-  for (size_t i = 0; i < result->boxes.size(); ++i) {
-    int32_t label_id = (result->label_ids)[i];
-    // clip box
-    result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
-    result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
-    result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
-    result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
-    result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f);
-    result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f);
-    result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f);
-    result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f);
-    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
-    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
-    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
-    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
-  }
-  return true;
-}
-
-bool YOLOv7::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold,
-                     float nms_iou_threshold) {
-  Mat mat(*im);
-
-  std::map<std::string, std::array<float, 2>> im_info;
-
-  // Record the shape of image and the shape of preprocessed image
-  im_info["input_shape"] = {static_cast<float>(mat.Height()),
-                            static_cast<float>(mat.Width())};
-  im_info["output_shape"] = {static_cast<float>(mat.Height()),
-                             static_cast<float>(mat.Width())};
-
-  if (use_cuda_preprocessing_) {
-    if (!CudaPreprocess(&mat, &reused_input_tensors_[0], &im_info)) {
-      FDERROR << "Failed to preprocess input image." << std::endl;
-      return false;
-    }
-  } else {
-    if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) {
-      FDERROR << "Failed to preprocess input image." << std::endl;
-      return false;
-    }
-  }
-
-  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
-  if (!Infer()) {
-    FDERROR << "Failed to inference." << std::endl;
-    return false;
-  }
-
-  if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold,
-                   nms_iou_threshold)) {
-    FDERROR << "Failed to post process." << std::endl;
-    return false;
-  }
-
-  return true;
-}
-
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/contrib/yolov7.h b/fastdeploy/vision/detection/contrib/yolov7.h
deleted file mode 100644
index b9d637ed9ab..00000000000
--- a/fastdeploy/vision/detection/contrib/yolov7.h
+++ /dev/null
@@ -1,113 +0,0 @@
-﻿// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-/*! @brief YOLOv7 model object used when to load a YOLOv7 model exported by YOLOv7.
- */
-class FASTDEPLOY_DECL YOLOv7 : public FastDeployModel {
- public:
-  /** \brief  Set path of model file and the configuration of runtime.
-   *
-   * \param[in] model_file Path of model file, e.g ./yolov7.onnx
-   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored
-   * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends"
-   * \param[in] model_format Model format of the loaded model, default is ONNX format
-   */
-  YOLOv7(const std::string& model_file, const std::string& params_file = "",
-         const RuntimeOption& custom_option = RuntimeOption(),
-         const ModelFormat& model_format = ModelFormat::ONNX);
-
-  ~YOLOv7();
-
-  virtual std::string ModelName() const { return "yolov7"; }
-  /** \brief Predict the detection result for an input image
-   *
-   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
-   * \param[in] result The output detection result will be writen to this structure
-   * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.25
-   * \param[in] nms_iou_threshold iou threashold for NMS, default is 0.5
-   * \return true if the prediction successed, otherwise false
-   */
-  virtual bool Predict(cv::Mat* im, DetectionResult* result,
-                       float conf_threshold = 0.25,
-                       float nms_iou_threshold = 0.5);
-
-
-  void UseCudaPreprocessing(int max_img_size = 3840 * 2160);
-
-  /*! @brief
-  Argument for image preprocessing step, tuple of (width, height), decide the target size after resize, default size = {640, 640}
-  */
-  std::vector<int> size;
-  // padding value, size should be the same as channels
-
-  std::vector<float> padding_value;
-  // only pad to the minimum rectange which height and width is times of stride
-  bool is_mini_pad;
-  // while is_mini_pad = false and is_no_pad = true,
-  // will resize the image to the set size
-  bool is_no_pad;
-  // if is_scale_up is false, the input image only can be zoom out,
-  // the maximum resize scale cannot exceed 1.0
-  bool is_scale_up;
-  // padding stride, for is_mini_pad
-  int stride;
-  // for offseting the boxes by classes when using NMS
-  float max_wh;
-
- private:
-  bool Initialize();
-
-  bool Preprocess(Mat* mat, FDTensor* output,
-                  std::map<std::string, std::array<float, 2>>* im_info);
-
-  bool CudaPreprocess(Mat* mat, FDTensor* output,
-                      std::map<std::string, std::array<float, 2>>* im_info);
-
-  bool Postprocess(FDTensor& infer_result, DetectionResult* result,
-                   const std::map<std::string, std::array<float, 2>>& im_info,
-                   float conf_threshold, float nms_iou_threshold);
-
-  void LetterBox(Mat* mat, const std::vector<int>& size,
-                 const std::vector<float>& color, bool _auto,
-                 bool scale_fill = false, bool scale_up = true,
-                 int stride = 32);
-
-  // whether to inference with dynamic shape (e.g ONNX export with dynamic shape
-  // or not.)
-  // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This
-  // value will
-  // auto check by fastdeploy after the internal Runtime already initialized.
-  bool is_dynamic_input_;
-  // CUDA host buffer for input image
-  uint8_t* input_img_cuda_buffer_host_ = nullptr;
-  // CUDA device buffer for input image
-  uint8_t* input_img_cuda_buffer_device_ = nullptr;
-  // CUDA device buffer for TRT input tensor
-  float* input_tensor_cuda_buffer_device_ = nullptr;
-  // Whether to use CUDA preprocessing
-  bool use_cuda_preprocessing_ = false;
-  // CUDA stream
-  void* cuda_stream_ = nullptr;
-};
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/contrib/yolov7/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov7/postprocessor.cc
new file mode 100755
index 00000000000..01d657adb30
--- /dev/null
+++ b/fastdeploy/vision/detection/contrib/yolov7/postprocessor.cc
@@ -0,0 +1,103 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision/detection/contrib/yolov7/postprocessor.h"
+#include "fastdeploy/vision/utils/utils.h"
+
+namespace fastdeploy {
+namespace vision {
+namespace detection {
+
+YOLOv7Postprocessor::YOLOv7Postprocessor() {
+  conf_threshold_ = 0.25;
+  nms_threshold_ = 0.5;
+  max_wh_ = 7680.0;
+}
+
+bool YOLOv7Postprocessor::Run(const std::vector<FDTensor>& tensors, std::vector<DetectionResult>* results,
+                              const std::vector<std::map<std::string, std::array<float, 2>>>& ims_info) {
+  int batch = tensors[0].shape[0];
+ 
+  results->resize(batch);
+
+  for (size_t bs = 0; bs < batch; ++bs) {
+    (*results)[bs].Clear();
+    (*results)[bs].Reserve(tensors[0].shape[1]);
+    if (tensors[0].dtype != FDDataType::FP32) {
+      FDERROR << "Only support post process with float32 data." << std::endl;
+      return false;
+    }
+    const float* data = reinterpret_cast<const float*>(tensors[0].Data()) + bs * tensors[0].shape[1] * tensors[0].shape[2];
+    for (size_t i = 0; i < tensors[0].shape[1]; ++i) {
+      int s = i * tensors[0].shape[2];
+      float confidence = data[s + 4];
+      const float* max_class_score =
+          std::max_element(data + s + 5, data + s + tensors[0].shape[2]);
+      confidence *= (*max_class_score);
+      // filter boxes by conf_threshold
+      if (confidence <= conf_threshold_) {
+        continue;
+      }
+      int32_t label_id = std::distance(data + s + 5, max_class_score);
+      // convert from [x, y, w, h] to [x1, y1, x2, y2]
+      (*results)[bs].boxes.emplace_back(std::array<float, 4>{
+          data[s] - data[s + 2] / 2.0f + label_id * max_wh_,
+          data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_,
+          data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_,
+          data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_});
+      (*results)[bs].label_ids.push_back(label_id);
+      (*results)[bs].scores.push_back(confidence);
+    }
+
+    if ((*results)[bs].boxes.size() == 0) {
+      return true;
+    }
+
+    utils::NMS(&((*results)[bs]), nms_threshold_);
+
+    // scale the boxes to the origin image shape
+    auto iter_out = ims_info[bs].find("output_shape");
+    auto iter_ipt = ims_info[bs].find("input_shape");
+    FDASSERT(iter_out != ims_info[bs].end() && iter_ipt != ims_info[bs].end(),
+            "Cannot find input_shape or output_shape from im_info.");
+    float out_h = iter_out->second[0];
+    float out_w = iter_out->second[1];
+    float ipt_h = iter_ipt->second[0];
+    float ipt_w = iter_ipt->second[1];
+    float scale = std::min(out_h / ipt_h, out_w / ipt_w);
+    float pad_h = (out_h - ipt_h * scale) / 2;
+    float pad_w = (out_w - ipt_w * scale) / 2;
+    for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) {
+      int32_t label_id = ((*results)[bs].label_ids)[i];
+      // clip box
+      (*results)[bs].boxes[i][0] = (*results)[bs].boxes[i][0] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][1] = (*results)[bs].boxes[i][1] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][2] = (*results)[bs].boxes[i][2] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][3] = (*results)[bs].boxes[i][3] - max_wh_ * label_id;
+      (*results)[bs].boxes[i][0] = std::max(((*results)[bs].boxes[i][0] - pad_w) / scale, 0.0f);
+      (*results)[bs].boxes[i][1] = std::max(((*results)[bs].boxes[i][1] - pad_h) / scale, 0.0f);
+      (*results)[bs].boxes[i][2] = std::max(((*results)[bs].boxes[i][2] - pad_w) / scale, 0.0f);
+      (*results)[bs].boxes[i][3] = std::max(((*results)[bs].boxes[i][3] - pad_h) / scale, 0.0f);
+      (*results)[bs].boxes[i][0] = std::min((*results)[bs].boxes[i][0], ipt_w - 1.0f);
+      (*results)[bs].boxes[i][1] = std::min((*results)[bs].boxes[i][1], ipt_h - 1.0f);
+      (*results)[bs].boxes[i][2] = std::min((*results)[bs].boxes[i][2], ipt_w - 1.0f);
+      (*results)[bs].boxes[i][3] = std::min((*results)[bs].boxes[i][3], ipt_h - 1.0f);
+    }
+  }
+  return true;
+}
+
+}  // namespace detection
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/contrib/yolov7/postprocessor.h b/fastdeploy/vision/detection/contrib/yolov7/postprocessor.h
new file mode 100755
index 00000000000..5ece87eb8b7
--- /dev/null
+++ b/fastdeploy/vision/detection/contrib/yolov7/postprocessor.h
@@ -0,0 +1,66 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "fastdeploy/vision/common/processors/transform.h"
+#include "fastdeploy/vision/common/result.h"
+
+namespace fastdeploy {
+namespace vision {
+
+namespace detection {
+/*! @brief Postprocessor object for YOLOv7 serials model.
+ */
+class FASTDEPLOY_DECL YOLOv7Postprocessor {
+ public:
+  /** \brief Create a postprocessor instance for YOLOv7 serials model
+   */
+  YOLOv7Postprocessor();
+
+  /** \brief Process the result of runtime and fill to DetectionResult structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] result The output result of detection
+   * \param[in] ims_info The shape info list, record input_shape and output_shape
+   * \return true if the postprocess successed, otherwise false
+   */
+  bool Run(const std::vector<FDTensor>& tensors,
+      std::vector<DetectionResult>* results,
+      const std::vector<std::map<std::string, std::array<float, 2>>>& ims_info);
+
+  /// Set conf_threshold, default 0.25
+  void SetConfThreshold(const float& conf_threshold) {
+    conf_threshold_ = conf_threshold;
+  }
+
+  /// Get conf_threshold, default 0.25
+  float GetConfThreshold() const { return conf_threshold_; }
+
+  /// Set nms_threshold, default 0.5
+  void SetNMSThreshold(const float& nms_threshold) {
+    nms_threshold_ = nms_threshold;
+  }
+
+  /// Get nms_threshold, default 0.5
+  float GetNMSThreshold() const { return nms_threshold_; }
+
+ protected:
+  float conf_threshold_;
+  float nms_threshold_;
+  float max_wh_;
+};
+
+}  // namespace detection
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc
new file mode 100755
index 00000000000..26d831b0a38
--- /dev/null
+++ b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc
@@ -0,0 +1,131 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision/detection/contrib/yolov7/preprocessor.h"
+#include "fastdeploy/function/concat.h"
+
+namespace fastdeploy {
+namespace vision {
+namespace detection {
+
+YOLOv7Preprocessor::YOLOv7Preprocessor() {
+  resize_after_load_ = false;
+  size_ = {640, 640};
+  padding_value_ = {114.0, 114.0, 114.0};
+  is_mini_pad_ = false;
+  is_no_pad_ = false;
+  is_scale_up_ = false;
+  stride_ = 32;
+  max_wh_ = 7680.0;
+}
+
+void YOLOv7Preprocessor::LetterBox(FDMat* mat) {
+  float scale =
+      std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width());
+  if (!is_scale_up_) {
+    scale = std::min(scale, 1.0f);
+  }
+
+  int resize_h = int(round(mat->Height() * scale));
+  int resize_w = int(round(mat->Width() * scale));
+
+  int pad_w = size_[0] - resize_w;
+  int pad_h = size_[1] - resize_h;
+  if (is_mini_pad_) {
+    pad_h = pad_h % stride_;
+    pad_w = pad_w % stride_;
+  } else if (is_no_pad_) {
+    pad_h = 0;
+    pad_w = 0;
+    resize_h = size_[1];
+    resize_w = size_[0];
+  }
+  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+    Resize::Run(mat, resize_w, resize_h);
+  }
+  if (pad_h > 0 || pad_w > 0) {
+    float half_h = pad_h * 1.0 / 2;
+    int top = int(round(half_h - 0.1));
+    int bottom = int(round(half_h + 0.1));
+    float half_w = pad_w * 1.0 / 2;
+    int left = int(round(half_w - 0.1));
+    int right = int(round(half_w + 0.1));
+    Pad::Run(mat, top, bottom, left, right, padding_value_);
+  }
+}
+
+bool YOLOv7Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
+            std::map<std::string, std::array<float, 2>>* im_info) {
+  // Record the shape of image and the shape of preprocessed image
+  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
+                               static_cast<float>(mat->Width())};
+  // process after image load
+  if (resize_after_load_) {
+    double ratio = (size_[0] * 1.0) / std::max(static_cast<float>(mat->Height()),
+                                              static_cast<float>(mat->Width()));
+    if (std::fabs(ratio - 1.0f) > 1e-06) {
+      int interp = cv::INTER_AREA;
+      if (ratio > 1.0) {
+        interp = cv::INTER_LINEAR;
+      }
+      int resize_h = int(mat->Height() * ratio);
+      int resize_w = int(mat->Width() * ratio);
+      Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
+    }
+  }
+  // yolov7's preprocess steps
+  // 1. letterbox
+  // 2. convert_and_permute(swap_rb=true)
+  LetterBox(mat);
+  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
+  ConvertAndPermute::Run(mat, alpha, beta, true);
+
+  // Record output shape of preprocessed image
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
+  mat->ShareWithTensor(output);
+  output->ExpandDim(0);  // reshape to n, h, w, c
+  return true;
+}
+
+bool YOLOv7Preprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
+                             std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
+  if (images->size() == 0) {
+    FDERROR << "The size of input images should be greater than 0." << std::endl;
+    return false;
+  }
+  ims_info->resize(images->size());
+  outputs->resize(1);
+  // Concat all the preprocessed data to a batch tensor
+  std::vector<FDTensor> tensors(images->size()); 
+  for (size_t i = 0; i < images->size(); ++i) {
+    if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
+      FDERROR << "Failed to preprocess input image." << std::endl;
+      return false;
+    }
+  }
+
+  if (tensors.size() == 1) {
+    (*outputs)[0] = std::move(tensors[0]);
+  } else {
+    function::Concat(tensors, &((*outputs)[0]), 0);
+  }
+  return true;
+}
+
+}  // namespace detection
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.h b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.h
new file mode 100755
index 00000000000..ddcb786e599
--- /dev/null
+++ b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.h
@@ -0,0 +1,98 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "fastdeploy/vision/common/processors/transform.h"
+#include "fastdeploy/vision/common/result.h"
+
+namespace fastdeploy {
+namespace vision {
+
+namespace detection {
+/*! @brief Preprocessor object for YOLOv7 serials model.
+ */
+class FASTDEPLOY_DECL YOLOv7Preprocessor {
+ public:
+  /** \brief Create a preprocessor instance for YOLOv7 serials model
+   */
+  YOLOv7Preprocessor();
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned by cv::imread()
+   * \param[in] outputs The output tensors which will feed in runtime
+   * \param[in] ims_info The shape info list, record input_shape and output_shape
+   * \return true if the preprocess successed, otherwise false
+   */
+  bool Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
+           std::vector<std::map<std::string, std::array<float, 2>>>* ims_info);
+
+  /// Set target size, tuple of (width, height), default size = {640, 640}
+  void SetSize(const std::vector<int>& size) { size_ = size; }
+
+  /// Get target size, tuple of (width, height), default size = {640, 640}
+  std::vector<int> GetSize() const { return size_; }
+
+  /// Set padding value, size should be the same as channels
+  void SetPaddingValue(const std::vector<float>& padding_value) {
+    padding_value_ = padding_value;
+  }
+
+  /// Get padding value, size should be the same as channels
+  std::vector<float> GetPaddingValue() const { return padding_value_; }
+
+  /// Set resize_after_load, may have an impact on map, default false
+  void SetResizeAfterLoad(bool resize_after_load) {
+    resize_after_load_ = resize_after_load;
+  }
+
+  /// Get resize_after_load, default false
+  bool GetResizeAfterLoad() const { return resize_after_load_; }
+
+ protected:
+  bool Preprocess(FDMat* mat, FDTensor* output,
+                  std::map<std::string, std::array<float, 2>>* im_info);
+
+  void LetterBox(FDMat* mat);
+
+  // whether resize after image load, may have an impact on map, default false
+  bool resize_after_load_;
+
+  // target size, tuple of (width, height), default size = {640, 640}
+  std::vector<int> size_;
+
+  // padding value, size should be the same as channels
+  std::vector<float> padding_value_;
+
+  // only pad to the minimum rectange which height and width is times of stride
+  bool is_mini_pad_;
+
+  // while is_mini_pad = false and is_no_pad = true,
+  // will resize the image to the set size
+  bool is_no_pad_;
+
+  // if is_scale_up is false, the input image only can be zoom out,
+  // the maximum resize scale cannot exceed 1.0
+  bool is_scale_up_;
+
+  // padding stride, for is_mini_pad
+  int stride_;
+
+  // for offseting the boxes by classes when using NMS
+  float max_wh_;
+};
+
+}  // namespace detection
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/contrib/yolov7/yolov7.cc b/fastdeploy/vision/detection/contrib/yolov7/yolov7.cc
new file mode 100755
index 00000000000..513351a095e
--- /dev/null
+++ b/fastdeploy/vision/detection/contrib/yolov7/yolov7.cc
@@ -0,0 +1,89 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision/detection/contrib/yolov7/yolov7.h"
+
+namespace fastdeploy {
+namespace vision {
+namespace detection {
+
+YOLOv7::YOLOv7(const std::string& model_file, const std::string& params_file,
+               const RuntimeOption& custom_option,
+               const ModelFormat& model_format) {
+  if (model_format == ModelFormat::ONNX) {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
+  } else {
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
+    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+  }
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool YOLOv7::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool YOLOv7::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, float nms_threshold) {
+  postprocessor_.SetConfThreshold(conf_threshold);
+  postprocessor_.SetNMSThreshold(nms_threshold);
+  if (!Predict(*im, result)) {
+    return false;
+  }
+  return true;
+}
+
+bool YOLOv7::Predict(const cv::Mat& im, DetectionResult* result) {
+  std::vector<DetectionResult> results;
+  if (!BatchPredict({im}, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
+  return true;
+}
+
+bool YOLOv7::BatchPredict(const std::vector<cv::Mat>& images, std::vector<DetectionResult>* results) {
+  std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+  std::vector<FDMat> fd_images = WrapMat(images);
+
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results, ims_info)) {
+    FDERROR << "Failed to postprocess the inference results by runtime." << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+}  // namespace detection
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/contrib/yolov7/yolov7.h b/fastdeploy/vision/detection/contrib/yolov7/yolov7.h
new file mode 100755
index 00000000000..2c36fd0c809
--- /dev/null
+++ b/fastdeploy/vision/detection/contrib/yolov7/yolov7.h
@@ -0,0 +1,88 @@
+﻿// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "fastdeploy/fastdeploy_model.h"
+#include "fastdeploy/vision/detection/contrib/yolov7/preprocessor.h"
+#include "fastdeploy/vision/detection/contrib/yolov7/postprocessor.h"
+
+namespace fastdeploy {
+namespace vision {
+namespace detection {
+/*! @brief YOLOv7 model object used when to load a YOLOv7 model exported by YOLOv7.
+ */
+class FASTDEPLOY_DECL YOLOv7 : public FastDeployModel {
+ public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g ./yolov7.onnx
+   * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+   * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends"
+   * \param[in] model_format Model format of the loaded model, default is ONNX format
+   */
+  YOLOv7(const std::string& model_file, const std::string& params_file = "",
+         const RuntimeOption& custom_option = RuntimeOption(),
+         const ModelFormat& model_format = ModelFormat::ONNX);
+
+  std::string ModelName() const { return "yolov7"; }
+
+  /** \brief DEPRECATED Predict the detection result for an input image, remove at 1.0 version
+   *
+   * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
+   * \param[in] result The output detection result will be writen to this structure
+   * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.25
+   * \param[in] nms_threshold iou threashold for NMS, default is 0.5
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool Predict(cv::Mat* im, DetectionResult* result,
+                       float conf_threshold = 0.25,
+                       float nms_threshold = 0.5);
+
+  /** \brief Predict the detection result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
+   * \param[in] result The output detection result will be writen to this structure
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat& img, DetectionResult* result);
+
+  /** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat>& imgs,
+                            std::vector<DetectionResult>* results);
+
+  /// Get preprocessor reference of YOLOv7
+  virtual YOLOv7Preprocessor& GetPreprocessor() {
+    return preprocessor_;
+  }
+
+  /// Get postprocessor reference of YOLOv7
+  virtual YOLOv7Postprocessor& GetPostprocessor() {
+    return postprocessor_;
+  }
+
+ protected:
+  bool Initialize();
+  YOLOv7Preprocessor preprocessor_;
+  YOLOv7Postprocessor postprocessor_;
+};
+
+}  // namespace detection
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/contrib/yolov7/yolov7_pybind.cc b/fastdeploy/vision/detection/contrib/yolov7/yolov7_pybind.cc
new file mode 100755
index 00000000000..9e3dad22ce7
--- /dev/null
+++ b/fastdeploy/vision/detection/contrib/yolov7/yolov7_pybind.cc
@@ -0,0 +1,87 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/pybind/main.h"
+
+namespace fastdeploy {
+void BindYOLOv7(pybind11::module& m) {
+  pybind11::class_<vision::detection::YOLOv7Preprocessor>(
+      m, "YOLOv7Preprocessor")
+      .def(pybind11::init<>())
+      .def("run", [](vision::detection::YOLOv7Preprocessor& self, std::vector<pybind11::array>& im_list) {
+        std::vector<vision::FDMat> images;
+        for (size_t i = 0; i < im_list.size(); ++i) {
+          images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+        }
+        std::vector<FDTensor> outputs;
+        std::vector<std::map<std::string, std::array<float, 2>>> ims_info;
+        if (!self.Run(&images, &outputs, &ims_info)) {
+          pybind11::eval("raise Exception('Failed to preprocess the input data in PaddleClasPreprocessor.')");
+        }
+        for (size_t i = 0; i < outputs.size(); ++i) {
+          outputs[i].StopSharing();
+        }
+        return make_pair(outputs, ims_info);
+      })
+      .def_property("size", &vision::detection::YOLOv7Preprocessor::GetSize, &vision::detection::YOLOv7Preprocessor::SetSize)
+      .def_property("padding_value", &vision::detection::YOLOv7Preprocessor::GetPaddingValue, &vision::detection::YOLOv7Preprocessor::SetPaddingValue)
+      .def_property("resize_after_load", &vision::detection::YOLOv7Preprocessor::GetResizeAfterLoad, &vision::detection::YOLOv7Preprocessor::SetResizeAfterLoad);
+
+  pybind11::class_<vision::detection::YOLOv7Postprocessor>(
+      m, "YOLOv7Postprocessor")
+      .def(pybind11::init<>())
+      .def("run", [](vision::detection::YOLOv7Postprocessor& self, std::vector<FDTensor>& inputs,
+                     const std::vector<std::map<std::string, std::array<float, 2>>>& ims_info) {
+        std::vector<vision::DetectionResult> results;
+        if (!self.Run(inputs, &results, ims_info)) {
+          pybind11::eval("raise Exception('Failed to postprocess the runtime result in YOLOv7Postprocessor.')");
+        }
+        return results;
+      })
+      .def("run", [](vision::detection::YOLOv7Postprocessor& self, std::vector<pybind11::array>& input_array,
+                     const std::vector<std::map<std::string, std::array<float, 2>>>& ims_info) {
+        std::vector<vision::DetectionResult> results;
+        std::vector<FDTensor> inputs;
+        PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+        if (!self.Run(inputs, &results, ims_info)) {
+          pybind11::eval("raise Exception('Failed to postprocess the runtime result in YOLOv7Postprocessor.')");
+        }
+        return results;
+      })
+      .def_property("conf_threshold", &vision::detection::YOLOv7Postprocessor::GetConfThreshold, &vision::detection::YOLOv7Postprocessor::SetConfThreshold)
+      .def_property("nms_threshold", &vision::detection::YOLOv7Postprocessor::GetNMSThreshold, &vision::detection::YOLOv7Postprocessor::SetNMSThreshold);
+
+  pybind11::class_<vision::detection::YOLOv7, FastDeployModel>(m, "YOLOv7")
+      .def(pybind11::init<std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::detection::YOLOv7& self, pybind11::array& data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::DetectionResult res;
+             self.Predict(mat, &res);
+             return res;
+           })
+      .def("batch_predict", [](vision::detection::YOLOv7& self, std::vector<pybind11::array>& data) {
+        std::vector<cv::Mat> images;
+        for (size_t i = 0; i < data.size(); ++i) {
+          images.push_back(PyArrayToCvMat(data[i]));
+        }
+        std::vector<vision::DetectionResult> results;
+        self.BatchPredict(images, &results);
+        return results;
+      })
+      .def_property_readonly("preprocessor", &vision::detection::YOLOv7::GetPreprocessor)
+      .def_property_readonly("postprocessor", &vision::detection::YOLOv7::GetPostprocessor);
+}
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/contrib/yolov7_pybind.cc b/fastdeploy/vision/detection/contrib/yolov7_pybind.cc
deleted file mode 100644
index d7ab993401d..00000000000
--- a/fastdeploy/vision/detection/contrib/yolov7_pybind.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindYOLOv7(pybind11::module& m) {
-  pybind11::class_<vision::detection::YOLOv7, FastDeployModel>(m, "YOLOv7")
-      .def(pybind11::init<std::string, std::string, RuntimeOption,
-                          ModelFormat>())
-      .def("predict",
-           [](vision::detection::YOLOv7& self, pybind11::array& data,
-              float conf_threshold, float nms_iou_threshold) {
-             auto mat = PyArrayToCvMat(data);
-             vision::DetectionResult res;
-             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
-             return res;
-           })
-      .def("use_cuda_preprocessing",
-           [](vision::detection::YOLOv7& self, int max_image_size) {
-             self.UseCudaPreprocessing(max_image_size);
-           })
-      .def_readwrite("size", &vision::detection::YOLOv7::size)
-      .def_readwrite("padding_value", &vision::detection::YOLOv7::padding_value)
-      .def_readwrite("is_mini_pad", &vision::detection::YOLOv7::is_mini_pad)
-      .def_readwrite("is_no_pad", &vision::detection::YOLOv7::is_no_pad)
-      .def_readwrite("is_scale_up", &vision::detection::YOLOv7::is_scale_up)
-      .def_readwrite("stride", &vision::detection::YOLOv7::stride)
-      .def_readwrite("max_wh", &vision::detection::YOLOv7::max_wh);
-}
-}  // namespace fastdeploy
diff --git a/python/fastdeploy/vision/detection/__init__.py b/python/fastdeploy/vision/detection/__init__.py
index 6de4a3fa634..b5f01f3a77e 100755
--- a/python/fastdeploy/vision/detection/__init__.py
+++ b/python/fastdeploy/vision/detection/__init__.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 from __future__ import absolute_import
-from .contrib.yolov7 import YOLOv7
+from .contrib.yolov7 import *
 from .contrib.yolor import YOLOR
 from .contrib.scaled_yolov4 import ScaledYOLOv4
 from .contrib.nanodet_plus import NanoDetPlus
diff --git a/python/fastdeploy/vision/detection/contrib/yolov5.py b/python/fastdeploy/vision/detection/contrib/yolov5.py
index 42eccb88d4a..e8895083731 100644
--- a/python/fastdeploy/vision/detection/contrib/yolov5.py
+++ b/python/fastdeploy/vision/detection/contrib/yolov5.py
@@ -41,9 +41,19 @@ def size(self):
 
     @property
     def padding_value(self):
+        """
+        padding value for preprocessing, default [114.0, 114.0, 114.0]
+        """
         #  padding value, size should be the same as channels
         return self._preprocessor.padding_value
 
+    @property
+    def resize_after_load(self):
+        """
+        resize_after_load for preprocessing, may have an impact on map, default false
+        """
+        return self._preprocessor.resize_after_load
+
     @size.setter
     def size(self, wh):
         assert isinstance(wh, (list, tuple)),\
@@ -60,6 +70,13 @@ def padding_value(self, value):
             list), "The value to set `padding_value` must be type of list."
         self._preprocessor.padding_value = value
 
+    @resize_after_load.setter
+    def resize_after_load(self, value):
+        assert isinstance(
+            value,
+            bool), "The value to set `resize_after_load` must be type of bool."
+        self._preprocessor.resize_after_load = value
+
 
 class YOLOv5Postprocessor:
     def __init__(self):
@@ -93,7 +110,7 @@ def nms_threshold(self):
     @property
     def multi_label(self):
         """
-        multi_label for postprocessing, default is true
+        multi_label for postprocessing, set true for eval, default is false
         """
         return self._postprocessor.multi_label
 
diff --git a/python/fastdeploy/vision/detection/contrib/yolov7.py b/python/fastdeploy/vision/detection/contrib/yolov7.py
index 0334504851b..8b7ef44305a 100644
--- a/python/fastdeploy/vision/detection/contrib/yolov7.py
+++ b/python/fastdeploy/vision/detection/contrib/yolov7.py
@@ -18,6 +18,108 @@
 from .... import c_lib_wrap as C
 
 
+class YOLOv7Preprocessor:
+    def __init__(self):
+        """Create a preprocessor for YOLOv7
+        """
+        self._preprocessor = C.vision.detection.YOLOv7Preprocessor()
+
+    def run(self, input_ims):
+        """Preprocess input images for YOLOv7
+
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor
+        """
+        return self._preprocessor.run(input_ims)
+
+    @property
+    def size(self):
+        """
+        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640]
+        """
+        return self._preprocessor.size
+
+    @property
+    def padding_value(self):
+        """
+        padding value for preprocessing, default [114.0, 114.0, 114.0]
+        """
+        #  padding value, size should be the same as channels
+        return self._preprocessor.padding_value
+
+    @property
+    def resize_after_load(self):
+        """
+        resize_after_load for preprocessing, may have an impact on map, default false
+        """
+        return self._preprocessor.resize_after_load
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(wh, (list, tuple)),\
+            "The value to set `size` must be type of tuple or list."
+        assert len(wh) == 2,\
+            "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh))
+        self._preprocessor.size = wh
+
+    @padding_value.setter
+    def padding_value(self, value):
+        assert isinstance(
+            value,
+            list), "The value to set `padding_value` must be type of list."
+        self._preprocessor.padding_value = value
+
+    @resize_after_load.setter
+    def resize_after_load(self, value):
+        assert isinstance(
+            value,
+            bool), "The value to set `resize_after_load` must be type of bool."
+        self._preprocessor.resize_after_load = value
+
+
+class YOLOv7Postprocessor:
+    def __init__(self):
+        """Create a postprocessor for YOLOv7
+        """
+        self._postprocessor = C.vision.detection.YOLOv7Postprocessor()
+
+    def run(self, runtime_results, ims_info):
+        """Postprocess the runtime results for YOLOv7
+
+        :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime
+        :param: ims_info: (list of dict)Record input_shape and output_shape
+        :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results, ims_info)
+
+    @property
+    def conf_threshold(self):
+        """
+        confidence threshold for postprocessing, default is 0.25
+        """
+        return self._postprocessor.conf_threshold
+
+    @property
+    def nms_threshold(self):
+        """
+        nms threshold for postprocessing, default is 0.5
+        """
+        return self._postprocessor.nms_threshold
+
+    @conf_threshold.setter
+    def conf_threshold(self, conf_threshold):
+        assert isinstance(conf_threshold, float),\
+            "The value to set `conf_threshold` must be type of float."
+        self._postprocessor.conf_threshold = conf_threshold
+
+    @nms_threshold.setter
+    def nms_threshold(self, nms_threshold):
+        assert isinstance(nms_threshold, float),\
+            "The value to set `nms_threshold` must be type of float."
+        self._postprocessor.nms_threshold = nms_threshold
+
+
 class YOLOv7(FastDeployModel):
     def __init__(self,
                  model_file,
@@ -35,6 +137,7 @@ def __init__(self,
         # 初始化后的option保存在self._runtime_option
         super(YOLOv7, self).__init__(runtime_option)
 
+        assert model_format == ModelFormat.ONNX, "YOLOv7 only support model format of ModelFormat.ONNX now."
         self._model = C.vision.detection.YOLOv7(
             model_file, params_file, self._runtime_option, model_format)
         # 通过self.initialized判断整个模型的初始化是否成功
@@ -44,96 +147,36 @@ def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5):
         """Detect an input image
 
         :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
-        :param conf_threshold: confidence threashold for postprocessing, default is 0.25
-        :param nms_iou_threshold: iou threashold for NMS, default is 0.5
+        :param conf_threshold: confidence threshold for postprocessing, default is 0.25
+        :param nms_iou_threshold: iou threshold for NMS, default is 0.5
         :return: DetectionResult
         """
-        return self._model.predict(input_image, conf_threshold,
-                                   nms_iou_threshold)
 
-    # 一些跟YOLOv7模型有关的属性封装
-    # 多数是预处理相关，可通过修改如model.size = [1280, 1280]改变预处理时resize的大小（前提是模型支持）
-    @property
-    def size(self):
-        """
-        Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640]
-        """
-        return self._model.size
+        self.postprocessor.conf_threshold = conf_threshold
+        self.postprocessor.nms_threshold = nms_iou_threshold
+        return self._model.predict(input_image)
 
-    @property
-    def padding_value(self):
-        #  padding value, size should be the same as channels
-        return self._model.padding_value
+    def batch_predict(self, images):
+        """Classify a batch of input image
 
-    @property
-    def is_no_pad(self):
-        # while is_mini_pad = false and is_no_pad = true, will resize the image to the set size
-        return self._model.is_no_pad
+        :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return list of DetectionResult
+        """
 
-    @property
-    def is_mini_pad(self):
-        # only pad to the minimum rectange which height and width is times of stride
-        return self._model.is_mini_pad
+        return self._model.batch_predict(images)
 
     @property
-    def is_scale_up(self):
-        # if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0
-        return self._model.is_scale_up
+    def preprocessor(self):
+        """Get YOLOv7Preprocessor object of the loaded model
 
-    @property
-    def stride(self):
-        # padding stride, for is_mini_pad
-        return self._model.stride
+        :return YOLOv7Preprocessor
+        """
+        return self._model.preprocessor
 
     @property
-    def max_wh(self):
-        # for offseting the boxes by classes when using NMS
-        return self._model.max_wh
+    def postprocessor(self):
+        """Get YOLOv7Postprocessor object of the loaded model
 
-    @size.setter
-    def size(self, wh):
-        assert isinstance(wh, (list, tuple)),\
-            "The value to set `size` must be type of tuple or list."
-        assert len(wh) == 2,\
-            "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
-            len(wh))
-        self._model.size = wh
-
-    @padding_value.setter
-    def padding_value(self, value):
-        assert isinstance(
-            value,
-            list), "The value to set `padding_value` must be type of list."
-        self._model.padding_value = value
-
-    @is_no_pad.setter
-    def is_no_pad(self, value):
-        assert isinstance(
-            value, bool), "The value to set `is_no_pad` must be type of bool."
-        self._model.is_no_pad = value
-
-    @is_mini_pad.setter
-    def is_mini_pad(self, value):
-        assert isinstance(
-            value,
-            bool), "The value to set `is_mini_pad` must be type of bool."
-        self._model.is_mini_pad = value
-
-    @is_scale_up.setter
-    def is_scale_up(self, value):
-        assert isinstance(
-            value,
-            bool), "The value to set `is_scale_up` must be type of bool."
-        self._model.is_scale_up = value
-
-    @stride.setter
-    def stride(self, value):
-        assert isinstance(
-            value, int), "The value to set `stride` must be type of int."
-        self._model.stride = value
-
-    @max_wh.setter
-    def max_wh(self, value):
-        assert isinstance(
-            value, float), "The value to set `max_wh` must be type of float."
-        self._model.max_wh = value
+        :return YOLOv7Postprocessor
+        """
+        return self._model.postprocessor

From 9c5f76631aca9af159476633bd9810dd8d09038d Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Wed, 16 Nov 2022 09:08:25 +0000
Subject: [PATCH 41/50] add yolov7 testcase

---
 tests/models/test_yolov7.py | 165 ++++++++++++++++++++++++++++++++++++
 1 file changed, 165 insertions(+)
 create mode 100755 tests/models/test_yolov7.py

diff --git a/tests/models/test_yolov7.py b/tests/models/test_yolov7.py
new file mode 100755
index 00000000000..ba08fbaf5bc
--- /dev/null
+++ b/tests/models/test_yolov7.py
@@ -0,0 +1,165 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from fastdeploy import ModelFormat
+import fastdeploy as fd
+import cv2
+import os
+import pickle
+import numpy as np
+import runtime_config as rc
+
+
+def test_detection_yolov7():
+    model_url = "https://bj.bcebos.com/paddlehub/fastdeploy/yolov7.onnx"
+    input_url1 = "https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg"
+    input_url2 = "https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000570688.jpg"
+    result_url1 = "https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_result1.pkl"
+    result_url2 = "https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_result2.pkl"
+    fd.download(model_url, "resources")
+    fd.download(input_url1, "resources")
+    fd.download(input_url2, "resources")
+    fd.download(result_url1, "resources")
+    fd.download(result_url2, "resources")
+
+    model_file = "resources/yolov7.onnx"
+    model = fd.vision.detection.YOLOv7(
+        model_file, runtime_option=rc.test_option)
+
+    with open("resources/yolov7_result1.pkl", "rb") as f:
+        expect1 = pickle.load(f)
+
+    with open("resources/yolov7_result2.pkl", "rb") as f:
+        expect2 = pickle.load(f)
+
+    # compare diff
+    im1 = cv2.imread("./resources/000000014439.jpg")
+    im2 = cv2.imread("./resources/000000570688.jpg")
+
+    for i in range(3):
+        # test single predict
+        result1 = model.predict(im1)
+        result2 = model.predict(im2)
+
+        diff_boxes_1 = np.fabs(
+            np.array(result1.boxes) - np.array(expect1["boxes"]))
+        diff_boxes_2 = np.fabs(
+            np.array(result2.boxes) - np.array(expect2["boxes"]))
+
+        diff_label_1 = np.fabs(
+            np.array(result1.label_ids) - np.array(expect1["label_ids"]))
+        diff_label_2 = np.fabs(
+            np.array(result2.label_ids) - np.array(expect2["label_ids"]))
+
+        diff_scores_1 = np.fabs(
+            np.array(result1.scores) - np.array(expect1["scores"]))
+        diff_scores_2 = np.fabs(
+            np.array(result2.scores) - np.array(expect2["scores"]))
+
+        assert diff_boxes_1.max(
+        ) < 1e-06, "There's difference in detection boxes 1."
+        assert diff_label_1.max(
+        ) < 1e-06, "There's difference in detection label 1."
+        assert diff_scores_1.max(
+        ) < 1e-05, "There's difference in detection score 1."
+
+        assert diff_boxes_2.max(
+        ) < 1e-06, "There's difference in detection boxes 2."
+        assert diff_label_2.max(
+        ) < 1e-06, "There's difference in detection label 2."
+        assert diff_scores_2.max(
+        ) < 1e-05, "There's difference in detection score 2."
+
+        # test batch predict
+        results = model.batch_predict([im1, im2])
+        result1 = results[0]
+        result2 = results[1]
+
+        diff_boxes_1 = np.fabs(
+            np.array(result1.boxes) - np.array(expect1["boxes"]))
+        diff_boxes_2 = np.fabs(
+            np.array(result2.boxes) - np.array(expect2["boxes"]))
+
+        diff_label_1 = np.fabs(
+            np.array(result1.label_ids) - np.array(expect1["label_ids"]))
+        diff_label_2 = np.fabs(
+            np.array(result2.label_ids) - np.array(expect2["label_ids"]))
+
+        diff_scores_1 = np.fabs(
+            np.array(result1.scores) - np.array(expect1["scores"]))
+        diff_scores_2 = np.fabs(
+            np.array(result2.scores) - np.array(expect2["scores"]))
+        assert diff_boxes_1.max(
+        ) < 1e-06, "There's difference in detection boxes 1."
+        assert diff_label_1.max(
+        ) < 1e-06, "There's difference in detection label 1."
+        assert diff_scores_1.max(
+        ) < 1e-05, "There's difference in detection score 1."
+
+        assert diff_boxes_2.max(
+        ) < 1e-06, "There's difference in detection boxes 2."
+        assert diff_label_2.max(
+        ) < 1e-06, "There's difference in detection label 2."
+        assert diff_scores_2.max(
+        ) < 1e-05, "There's difference in detection score 2."
+
+
+def test_detection_yolov7_runtime():
+    model_url = "https://bj.bcebos.com/paddlehub/fastdeploy/yolov7.onnx"
+    input_url1 = "https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg"
+    result_url1 = "https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_result1.pkl"
+    fd.download(model_url, "resources")
+    fd.download(input_url1, "resources")
+    fd.download(result_url1, "resources")
+
+    model_file = "resources/yolov7.onnx"
+
+    preprocessor = fd.vision.detection.YOLOv7Preprocessor()
+    postprocessor = fd.vision.detection.YOLOv7Postprocessor()
+
+    rc.test_option.set_model_path(model_file, model_format=ModelFormat.ONNX)
+    rc.test_option.use_openvino_backend()
+    runtime = fd.Runtime(rc.test_option)
+
+    with open("resources/yolov7_result1.pkl", "rb") as f:
+        expect1 = pickle.load(f)
+
+    # compare diff
+    im1 = cv2.imread("./resources/000000014439.jpg")
+
+    for i in range(3):
+        # test runtime
+        input_tensors, ims_info = preprocessor.run([im1.copy()])
+        output_tensors = runtime.infer({"images": input_tensors[0]})
+        results = postprocessor.run(output_tensors, ims_info)
+        result1 = results[0]
+
+        diff_boxes_1 = np.fabs(
+            np.array(result1.boxes) - np.array(expect1["boxes"]))
+        diff_label_1 = np.fabs(
+            np.array(result1.label_ids) - np.array(expect1["label_ids"]))
+        diff_scores_1 = np.fabs(
+            np.array(result1.scores) - np.array(expect1["scores"]))
+
+        assert diff_boxes_1.max(
+        ) < 1e-04, "There's difference in detection boxes 1."
+        assert diff_label_1.max(
+        ) < 1e-06, "There's difference in detection label 1."
+        assert diff_scores_1.max(
+        ) < 1e-05, "There's difference in detection score 1."
+
+
+if __name__ == "__main__":
+    test_detection_yolov7()
+    test_detection_yolov7_runtime()

From e39d60a111f12a1a434e7d9a01688a1c5c50e475 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Thu, 17 Nov 2022 03:49:51 +0000
Subject: [PATCH 42/50] rm resize_after_load and add is_scale_up

---
 .../detection/contrib/yolov5/preprocessor.cc    | 17 +----------------
 .../detection/contrib/yolov5/preprocessor.h     | 14 ++++++--------
 .../detection/contrib/yolov5/yolov5_pybind.cc   |  2 +-
 .../detection/contrib/yolov7/preprocessor.cc    | 17 +----------------
 .../detection/contrib/yolov7/preprocessor.h     | 14 ++++++--------
 .../detection/contrib/yolov7/yolov7_pybind.cc   |  2 +-
 .../vision/detection/contrib/yolov5.py          | 14 +++++++-------
 .../vision/detection/contrib/yolov7.py          | 14 +++++++-------
 8 files changed, 30 insertions(+), 64 deletions(-)

diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
index 0933f7f108a..6b1b6f82193 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
@@ -20,12 +20,11 @@ namespace vision {
 namespace detection {
 
 YOLOv5Preprocessor::YOLOv5Preprocessor() {
-  resize_after_load_ = false;
   size_ = {640, 640};
   padding_value_ = {114.0, 114.0, 114.0};
   is_mini_pad_ = false;
   is_no_pad_ = false;
-  is_scale_up_ = false;
+  is_scale_up_ = true;
   stride_ = 32;
   max_wh_ = 7680.0;
 }
@@ -70,20 +69,6 @@ bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
   // Record the shape of image and the shape of preprocessed image
   (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
                                static_cast<float>(mat->Width())};
-  // process after image load
-  if (resize_after_load_) {
-    double ratio = (size_[0] * 1.0) / std::max(static_cast<float>(mat->Height()),
-                                              static_cast<float>(mat->Width()));
-    if (std::fabs(ratio - 1.0f) > 1e-06) {
-      int interp = cv::INTER_AREA;
-      if (ratio > 1.0) {
-        interp = cv::INTER_LINEAR;
-      }
-      int resize_h = int(mat->Height() * ratio);
-      int resize_w = int(mat->Width() * ratio);
-      Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
-    }
-  }
   // yolov5's preprocess steps
   // 1. letterbox
   // 2. convert_and_permute(swap_rb=true)
diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
index 41aa25466cb..f0cf438df0c 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h
@@ -52,13 +52,14 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor {
   /// Get padding value, size should be the same as channels
   std::vector<float> GetPaddingValue() const { return padding_value_; }
 
-  /// Set resize_after_load, may have an impact on map, default false
-  void SetResizeAfterLoad(bool resize_after_load) {
-    resize_after_load_ = resize_after_load;
+  /// Set is_scale_up, if is_scale_up is false, the input image only
+  /// can be zoom out, the maximum resize scale cannot exceed 1.0, default true
+  void SetScaleUp(bool is_scale_up) {
+    is_scale_up_ = is_scale_up;
   }
 
-  /// Get resize_after_load, default false
-  bool GetResizeAfterLoad() const { return resize_after_load_; }
+  /// Get is_scale_up, default true
+  bool GetScaleUp() const { return is_scale_up_; }
 
  protected:
   bool Preprocess(FDMat* mat, FDTensor* output,
@@ -66,9 +67,6 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor {
 
   void LetterBox(FDMat* mat);
 
-  // whether resize after image load, may have an impact on map, default false
-  bool resize_after_load_;
-
   // target size, tuple of (width, height), default size = {640, 640}
   std::vector<int> size_;
 
diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
index 03e223e82fc..7b1574401fb 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc
@@ -36,7 +36,7 @@ void BindYOLOv5(pybind11::module& m) {
       })
       .def_property("size", &vision::detection::YOLOv5Preprocessor::GetSize, &vision::detection::YOLOv5Preprocessor::SetSize)
       .def_property("padding_value", &vision::detection::YOLOv5Preprocessor::GetPaddingValue, &vision::detection::YOLOv5Preprocessor::SetPaddingValue)
-      .def_property("resize_after_load", &vision::detection::YOLOv5Preprocessor::GetResizeAfterLoad, &vision::detection::YOLOv5Preprocessor::SetResizeAfterLoad);
+      .def_property("is_scale_up", &vision::detection::YOLOv5Preprocessor::GetScaleUp, &vision::detection::YOLOv5Preprocessor::SetScaleUp);
 
   pybind11::class_<vision::detection::YOLOv5Postprocessor>(
       m, "YOLOv5Postprocessor")
diff --git a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc
index 26d831b0a38..f669d145e4e 100755
--- a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc
@@ -20,12 +20,11 @@ namespace vision {
 namespace detection {
 
 YOLOv7Preprocessor::YOLOv7Preprocessor() {
-  resize_after_load_ = false;
   size_ = {640, 640};
   padding_value_ = {114.0, 114.0, 114.0};
   is_mini_pad_ = false;
   is_no_pad_ = false;
-  is_scale_up_ = false;
+  is_scale_up_ = true;
   stride_ = 32;
   max_wh_ = 7680.0;
 }
@@ -70,20 +69,6 @@ bool YOLOv7Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
   // Record the shape of image and the shape of preprocessed image
   (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
                                static_cast<float>(mat->Width())};
-  // process after image load
-  if (resize_after_load_) {
-    double ratio = (size_[0] * 1.0) / std::max(static_cast<float>(mat->Height()),
-                                              static_cast<float>(mat->Width()));
-    if (std::fabs(ratio - 1.0f) > 1e-06) {
-      int interp = cv::INTER_AREA;
-      if (ratio > 1.0) {
-        interp = cv::INTER_LINEAR;
-      }
-      int resize_h = int(mat->Height() * ratio);
-      int resize_w = int(mat->Width() * ratio);
-      Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
-    }
-  }
   // yolov7's preprocess steps
   // 1. letterbox
   // 2. convert_and_permute(swap_rb=true)
diff --git a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.h b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.h
index ddcb786e599..ff6c6cad55e 100755
--- a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.h
+++ b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.h
@@ -52,13 +52,14 @@ class FASTDEPLOY_DECL YOLOv7Preprocessor {
   /// Get padding value, size should be the same as channels
   std::vector<float> GetPaddingValue() const { return padding_value_; }
 
-  /// Set resize_after_load, may have an impact on map, default false
-  void SetResizeAfterLoad(bool resize_after_load) {
-    resize_after_load_ = resize_after_load;
+  /// Set is_scale_up, if is_scale_up is false, the input image only
+  /// can be zoom out, the maximum resize scale cannot exceed 1.0, default true
+  void SetScaleUp(bool is_scale_up) {
+    is_scale_up_ = is_scale_up;
   }
 
-  /// Get resize_after_load, default false
-  bool GetResizeAfterLoad() const { return resize_after_load_; }
+  /// Get is_scale_up, default true
+  bool GetScaleUp() const { return is_scale_up_; }
 
  protected:
   bool Preprocess(FDMat* mat, FDTensor* output,
@@ -66,9 +67,6 @@ class FASTDEPLOY_DECL YOLOv7Preprocessor {
 
   void LetterBox(FDMat* mat);
 
-  // whether resize after image load, may have an impact on map, default false
-  bool resize_after_load_;
-
   // target size, tuple of (width, height), default size = {640, 640}
   std::vector<int> size_;
 
diff --git a/fastdeploy/vision/detection/contrib/yolov7/yolov7_pybind.cc b/fastdeploy/vision/detection/contrib/yolov7/yolov7_pybind.cc
index 9e3dad22ce7..6899faa9167 100755
--- a/fastdeploy/vision/detection/contrib/yolov7/yolov7_pybind.cc
+++ b/fastdeploy/vision/detection/contrib/yolov7/yolov7_pybind.cc
@@ -36,7 +36,7 @@ void BindYOLOv7(pybind11::module& m) {
       })
       .def_property("size", &vision::detection::YOLOv7Preprocessor::GetSize, &vision::detection::YOLOv7Preprocessor::SetSize)
       .def_property("padding_value", &vision::detection::YOLOv7Preprocessor::GetPaddingValue, &vision::detection::YOLOv7Preprocessor::SetPaddingValue)
-      .def_property("resize_after_load", &vision::detection::YOLOv7Preprocessor::GetResizeAfterLoad, &vision::detection::YOLOv7Preprocessor::SetResizeAfterLoad);
+      .def_property("is_scale_up", &vision::detection::YOLOv7Preprocessor::GetScaleUp, &vision::detection::YOLOv7Preprocessor::SetScaleUp);
 
   pybind11::class_<vision::detection::YOLOv7Postprocessor>(
       m, "YOLOv7Postprocessor")
diff --git a/python/fastdeploy/vision/detection/contrib/yolov5.py b/python/fastdeploy/vision/detection/contrib/yolov5.py
index e8895083731..4cc68129ff5 100644
--- a/python/fastdeploy/vision/detection/contrib/yolov5.py
+++ b/python/fastdeploy/vision/detection/contrib/yolov5.py
@@ -48,11 +48,11 @@ def padding_value(self):
         return self._preprocessor.padding_value
 
     @property
-    def resize_after_load(self):
+    def is_scale_up(self):
         """
-        resize_after_load for preprocessing, may have an impact on map, default false
+        is_scale_up for preprocessing, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0, default true
         """
-        return self._preprocessor.resize_after_load
+        return self._preprocessor.is_scale_up
 
     @size.setter
     def size(self, wh):
@@ -70,12 +70,12 @@ def padding_value(self, value):
             list), "The value to set `padding_value` must be type of list."
         self._preprocessor.padding_value = value
 
-    @resize_after_load.setter
-    def resize_after_load(self, value):
+    @is_scale_up.setter
+    def is_scale_up(self, value):
         assert isinstance(
             value,
-            bool), "The value to set `resize_after_load` must be type of bool."
-        self._preprocessor.resize_after_load = value
+            bool), "The value to set `is_scale_up` must be type of bool."
+        self._preprocessor.is_scale_up = value
 
 
 class YOLOv5Postprocessor:
diff --git a/python/fastdeploy/vision/detection/contrib/yolov7.py b/python/fastdeploy/vision/detection/contrib/yolov7.py
index 8b7ef44305a..510b72ed653 100644
--- a/python/fastdeploy/vision/detection/contrib/yolov7.py
+++ b/python/fastdeploy/vision/detection/contrib/yolov7.py
@@ -48,11 +48,11 @@ def padding_value(self):
         return self._preprocessor.padding_value
 
     @property
-    def resize_after_load(self):
+    def is_scale_up(self):
         """
-        resize_after_load for preprocessing, may have an impact on map, default false
+        is_scale_up for preprocessing, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0, default true
         """
-        return self._preprocessor.resize_after_load
+        return self._preprocessor.is_scale_up
 
     @size.setter
     def size(self, wh):
@@ -70,12 +70,12 @@ def padding_value(self, value):
             list), "The value to set `padding_value` must be type of list."
         self._preprocessor.padding_value = value
 
-    @resize_after_load.setter
-    def resize_after_load(self, value):
+    @is_scale_up.setter
+    def is_scale_up(self, value):
         assert isinstance(
             value,
-            bool), "The value to set `resize_after_load` must be type of bool."
-        self._preprocessor.resize_after_load = value
+            bool), "The value to set `is_scale_up` must be type of bool."
+        self._preprocessor.is_scale_up = value
 
 
 class YOLOv7Postprocessor:

From bb1b8fc66c48452ce3332bbd7503a3f5a75ac0f7 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Thu, 17 Nov 2022 09:07:25 +0000
Subject: [PATCH 43/50] fixed bug

---
 fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc | 2 +-
 fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
index 6b1b6f82193..846e2513163 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
@@ -50,7 +50,7 @@ void YOLOv5Preprocessor::LetterBox(FDMat* mat) {
     resize_h = size_[1];
     resize_w = size_[0];
   }
-  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+  if (std::fabs(scale - 1.0f) > 1e-06) {
     Resize::Run(mat, resize_w, resize_h);
   }
   if (pad_h > 0 || pad_w > 0) {
diff --git a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc
index f669d145e4e..91e22f32b4b 100755
--- a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc
@@ -50,7 +50,7 @@ void YOLOv7Preprocessor::LetterBox(FDMat* mat) {
     resize_h = size_[1];
     resize_w = size_[0];
   }
-  if (resize_h != mat->Height() || resize_w != mat->Width()) {
+  if (std::fabs(scale - 1.0f) > 1e-06) {
     Resize::Run(mat, resize_w, resize_h);
   }
   if (pad_h > 0 || pad_w > 0) {

From 5f7b9359d00fb7dff90cddb6e517f40b8a02f224 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Thu, 17 Nov 2022 09:40:51 +0000
Subject: [PATCH 44/50] set multi_label true

---
 fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc | 2 +-
 fastdeploy/vision/detection/contrib/yolov5/postprocessor.h  | 4 ++--
 python/fastdeploy/vision/detection/contrib/yolov5.py        | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
index 0366fcce006..4fe01dfeb86 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc
@@ -22,7 +22,7 @@ namespace detection {
 YOLOv5Postprocessor::YOLOv5Postprocessor() {
   conf_threshold_ = 0.25;
   nms_threshold_ = 0.5;
-  multi_label_ = false;
+  multi_label_ = true;
   max_wh_ = 7680.0;
 }
 
diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
index c0ccf58de96..88f9400fa20 100755
--- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
+++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h
@@ -55,12 +55,12 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor {
   /// Get nms_threshold, default 0.5
   float GetNMSThreshold() const { return nms_threshold_; }
 
-  /// Set multi_label, set true for eval, default false
+  /// Set multi_label, set true for eval, default true
   void SetMultiLabel(bool multi_label) {
     multi_label_ = multi_label;
   }
 
-  /// Get multi_label, default false
+  /// Get multi_label, default true
   bool GetMultiLabel() const { return multi_label_; }
 
  protected:
diff --git a/python/fastdeploy/vision/detection/contrib/yolov5.py b/python/fastdeploy/vision/detection/contrib/yolov5.py
index 4cc68129ff5..b8113f3b83d 100644
--- a/python/fastdeploy/vision/detection/contrib/yolov5.py
+++ b/python/fastdeploy/vision/detection/contrib/yolov5.py
@@ -110,7 +110,7 @@ def nms_threshold(self):
     @property
     def multi_label(self):
         """
-        multi_label for postprocessing, set true for eval, default is false
+        multi_label for postprocessing, set true for eval, default is True
         """
         return self._postprocessor.multi_label
 

From 845f7a673e87a7fbbb3c97af41954a34e35688ea Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Tue, 22 Nov 2022 09:14:18 +0000
Subject: [PATCH 45/50] optimize rvm preprocess

---
 fastdeploy/vision/matting/contrib/rvm.cc | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/fastdeploy/vision/matting/contrib/rvm.cc b/fastdeploy/vision/matting/contrib/rvm.cc
index 846db6bd60e..7144a9018f7 100755
--- a/fastdeploy/vision/matting/contrib/rvm.cc
+++ b/fastdeploy/vision/matting/contrib/rvm.cc
@@ -63,20 +63,16 @@ bool RobustVideoMatting::Preprocess(
   if (resize_h != mat->Height() || resize_w != mat->Width()) {
     Resize::Run(mat, resize_w, resize_h);
   }
-  BGR2RGB::Run(mat);
-
-  // Normalize
+  // Convert_and_permute(swap_rb=true)
   std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
   std::vector<float> beta = {0.0f, 0.0f, 0.0f};
-  Convert::Run(mat, alpha, beta);
+  ConvertAndPermute::Run(mat, alpha, beta, true);
+
   // Record output shape of preprocessed image
   (*im_info)["output_shape"] = {mat->Height(), mat->Width()};
 
-  HWC2CHW::Run(mat);
-  Cast::Run(mat, "float");
-
   mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, h, w, c
   return true;
 }
 
@@ -120,8 +116,6 @@ bool RobustVideoMatting::Postprocess(
 
   // for alpha
   float* alpha_ptr = static_cast<float*>(alpha.Data());
-  // cv::Mat alpha_zero_copy_ref(out_h, out_w, CV_32FC1, alpha_ptr);
-  // Mat alpha_resized(alpha_zero_copy_ref);  // ref-only, zero copy.
   Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, 
                                   alpha_ptr); // ref-only, zero copy.
   if ((out_h != in_h) || (out_w != in_w)) {
@@ -130,8 +124,6 @@ bool RobustVideoMatting::Postprocess(
 
   // for foreground
   float* fgr_ptr = static_cast<float*>(fgr.Data());
-  // cv::Mat fgr_zero_copy_ref(out_h, out_w, CV_32FC1, fgr_ptr);
-  // Mat fgr_resized(fgr_zero_copy_ref);  // ref-only, zero copy.
   Mat fgr_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, 
                                 fgr_ptr); // ref-only, zero copy.
   if ((out_h != in_h) || (out_w != in_w)) {

From 8489624210828c8e71a97cb4679c30d7f5ab6726 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Wed, 23 Nov 2022 09:53:01 +0000
Subject: [PATCH 46/50] optimizer rvm postprocess

---
 .../common/processors/convert_and_permute.cc     |  2 +-
 .../common/processors/normalize_and_permute.cc   |  2 +-
 fastdeploy/vision/common/result.cc               |  8 ++++++--
 fastdeploy/vision/matting/contrib/rvm.cc         |  5 +++--
 fastdeploy/vision/matting/contrib/rvm.h          |  3 +++
 fastdeploy/vision/matting/contrib/rvm_pybind.cc  |  3 ++-
 python/fastdeploy/vision/matting/contrib/rvm.py  | 16 ++++++++++++++++
 tests/models/test_rvm.py                         |  1 +
 8 files changed, 33 insertions(+), 7 deletions(-)
 mode change 100644 => 100755 fastdeploy/vision/common/processors/normalize_and_permute.cc
 mode change 100644 => 100755 tests/models/test_rvm.py

diff --git a/fastdeploy/vision/common/processors/convert_and_permute.cc b/fastdeploy/vision/common/processors/convert_and_permute.cc
index 73cbb5b48f8..e37bf88cfdd 100644
--- a/fastdeploy/vision/common/processors/convert_and_permute.cc
+++ b/fastdeploy/vision/common/processors/convert_and_permute.cc
@@ -43,7 +43,7 @@ bool ConvertAndPermute::ImplByOpenCV(FDMat* mat) {
   for (int i = 0; i < im->channels(); ++i) {
     cv::extractChannel(split_im[i],
                        cv::Mat(origin_h, origin_w, CV_32FC1,
-                               res.ptr() + i * origin_h * origin_w * 4),
+                               res.ptr() + i * origin_h * origin_w * FDDataTypeSize(mat->Type())),
                        0);
   }
 
diff --git a/fastdeploy/vision/common/processors/normalize_and_permute.cc b/fastdeploy/vision/common/processors/normalize_and_permute.cc
old mode 100644
new mode 100755
index 93850b97fbf..9484c98d679
--- a/fastdeploy/vision/common/processors/normalize_and_permute.cc
+++ b/fastdeploy/vision/common/processors/normalize_and_permute.cc
@@ -70,7 +70,7 @@ bool NormalizeAndPermute::ImplByOpenCV(Mat* mat) {
   for (int i = 0; i < im->channels(); ++i) {
     cv::extractChannel(split_im[i],
                        cv::Mat(origin_h, origin_w, CV_32FC1,
-                               res.ptr() + i * origin_h * origin_w * 4),
+                               res.ptr() + i * origin_h * origin_w * FDDataTypeSize(mat->Type())),
                        0);
   }
   mat->SetMat(res);
diff --git a/fastdeploy/vision/common/result.cc b/fastdeploy/vision/common/result.cc
index ee137604891..f3b24c09eab 100755
--- a/fastdeploy/vision/common/result.cc
+++ b/fastdeploy/vision/common/result.cc
@@ -395,12 +395,16 @@ void MattingResult::Reserve(int size) {
 }
 
 void MattingResult::Resize(int size) {
-  alpha.resize(size);
+  if (alpha.capacity() < size) {
+    alpha.resize(size);
+  }
   if (contain_foreground) {
     FDASSERT((shape.size() == 3),
              "Please initial shape (h,w,c) before call Resize.");
     int c = static_cast<int>(shape[2]);
-    foreground.resize(size * c);
+    if (foreground.capacity() < size * c) {
+      foreground.resize(size * c);
+    }
   }
 }
 
diff --git a/fastdeploy/vision/matting/contrib/rvm.cc b/fastdeploy/vision/matting/contrib/rvm.cc
index 7144a9018f7..ea37402b044 100755
--- a/fastdeploy/vision/matting/contrib/rvm.cc
+++ b/fastdeploy/vision/matting/contrib/rvm.cc
@@ -47,6 +47,8 @@ bool RobustVideoMatting::Initialize() {
 
   video_mode = true;
 
+  swap_rb = true;
+
   if (!InitRuntime()) {
     FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
     return false;
@@ -66,7 +68,7 @@ bool RobustVideoMatting::Preprocess(
   // Convert_and_permute(swap_rb=true)
   std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
   std::vector<float> beta = {0.0f, 0.0f, 0.0f};
-  ConvertAndPermute::Run(mat, alpha, beta, true);
+  ConvertAndPermute::Run(mat, alpha, beta, swap_rb);
 
   // Record output shape of preprocessed image
   (*im_info)["output_shape"] = {mat->Height(), mat->Width()};
@@ -130,7 +132,6 @@ bool RobustVideoMatting::Postprocess(
     Resize::Run(&fgr_resized, in_w, in_h, -1, -1);
   }
 
-  result->Clear();
   result->contain_foreground = true;
   // if contain_foreground == true, shape must set to (h, w, c)
   result->shape = {static_cast<int64_t>(in_h), static_cast<int64_t>(in_w), 3};
diff --git a/fastdeploy/vision/matting/contrib/rvm.h b/fastdeploy/vision/matting/contrib/rvm.h
index 58c64ac3b16..3f842401bcf 100755
--- a/fastdeploy/vision/matting/contrib/rvm.h
+++ b/fastdeploy/vision/matting/contrib/rvm.h
@@ -58,6 +58,9 @@ class FASTDEPLOY_DECL RobustVideoMatting : public FastDeployModel {
   /// Whether to open the video mode, if there are some irrelevant pictures, set it to fasle, the default is true // NOLINT
   bool video_mode;
 
+  /// Whether convert to RGB, Set to false if you have converted YUV format images to RGB outside the model, dafault true // NOLINT
+  bool swap_rb;
+
  private:
   bool Initialize();
   /// Preprocess an input image, and set the preprocessed results to `outputs`
diff --git a/fastdeploy/vision/matting/contrib/rvm_pybind.cc b/fastdeploy/vision/matting/contrib/rvm_pybind.cc
index a45816d65b5..25d95f51943 100755
--- a/fastdeploy/vision/matting/contrib/rvm_pybind.cc
+++ b/fastdeploy/vision/matting/contrib/rvm_pybind.cc
@@ -28,7 +28,8 @@ void BindRobustVideoMatting(pybind11::module& m) {
              return res;
            })
       .def_readwrite("size", &vision::matting::RobustVideoMatting::size)
-      .def_readwrite("video_mode", &vision::matting::RobustVideoMatting::video_mode);
+      .def_readwrite("video_mode", &vision::matting::RobustVideoMatting::video_mode)
+      .def_readwrite("swap_rb", &vision::matting::RobustVideoMatting::swap_rb);
 }
 
 }  // namespace fastdeploy
diff --git a/python/fastdeploy/vision/matting/contrib/rvm.py b/python/fastdeploy/vision/matting/contrib/rvm.py
index 144a3823cdc..174719eae27 100755
--- a/python/fastdeploy/vision/matting/contrib/rvm.py
+++ b/python/fastdeploy/vision/matting/contrib/rvm.py
@@ -59,6 +59,13 @@ def video_mode(self):
         """
         return self._model.video_mode
 
+    @property
+    def swap_rb(self):
+        """
+        Whether convert to RGB, Set to false if you have converted YUV format images to RGB outside the model, dafault true
+        """
+        return self._model.swap_rb
+
     @size.setter
     def size(self, wh):
         """
@@ -79,3 +86,12 @@ def video_mode(self, value):
         assert isinstance(
             value, bool), "The value to set `video_mode` must be type of bool."
         self._model.video_mode = value
+
+    @swap_rb.setter
+    def swap_rb(self, value):
+        """
+        Set swap_rb property, the default is true
+        """
+        assert isinstance(
+            value, bool), "The value to set `swap_rb` must be type of bool."
+        self._model.swap_rb = value
diff --git a/tests/models/test_rvm.py b/tests/models/test_rvm.py
old mode 100644
new mode 100755
index 4fa3083e59a..c57b3f29d3e
--- a/tests/models/test_rvm.py
+++ b/tests/models/test_rvm.py
@@ -27,6 +27,7 @@ def test_matting_rvm_cpu():
     fd.download(input_url, "resources")
     model_path = "resources/rvm/rvm_mobilenetv3_fp32.onnx"
     # use ORT
+    rc.test_option.use_ort_backend()
     model = fd.vision.matting.RobustVideoMatting(
         model_path, runtime_option=rc.test_option)
 

From f1d23c801471d9b5f10b3a17eeed5f125ccc7403 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Wed, 23 Nov 2022 11:20:31 +0000
Subject: [PATCH 47/50] fixed bug

---
 fastdeploy/vision/common/processors/convert_and_permute.cc   | 2 +-
 fastdeploy/vision/common/processors/normalize_and_permute.cc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fastdeploy/vision/common/processors/convert_and_permute.cc b/fastdeploy/vision/common/processors/convert_and_permute.cc
index e37bf88cfdd..73cbb5b48f8 100644
--- a/fastdeploy/vision/common/processors/convert_and_permute.cc
+++ b/fastdeploy/vision/common/processors/convert_and_permute.cc
@@ -43,7 +43,7 @@ bool ConvertAndPermute::ImplByOpenCV(FDMat* mat) {
   for (int i = 0; i < im->channels(); ++i) {
     cv::extractChannel(split_im[i],
                        cv::Mat(origin_h, origin_w, CV_32FC1,
-                               res.ptr() + i * origin_h * origin_w * FDDataTypeSize(mat->Type())),
+                               res.ptr() + i * origin_h * origin_w * 4),
                        0);
   }
 
diff --git a/fastdeploy/vision/common/processors/normalize_and_permute.cc b/fastdeploy/vision/common/processors/normalize_and_permute.cc
index 9484c98d679..93850b97fbf 100755
--- a/fastdeploy/vision/common/processors/normalize_and_permute.cc
+++ b/fastdeploy/vision/common/processors/normalize_and_permute.cc
@@ -70,7 +70,7 @@ bool NormalizeAndPermute::ImplByOpenCV(Mat* mat) {
   for (int i = 0; i < im->channels(); ++i) {
     cv::extractChannel(split_im[i],
                        cv::Mat(origin_h, origin_w, CV_32FC1,
-                               res.ptr() + i * origin_h * origin_w * FDDataTypeSize(mat->Type())),
+                               res.ptr() + i * origin_h * origin_w * 4),
                        0);
   }
   mat->SetMat(res);

From 4ca1a7e7eb6ccb4fe2222d0927b8fdf7af1031b9 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Fri, 25 Nov 2022 02:31:13 +0000
Subject: [PATCH 48/50] deal with comments

---
 fastdeploy/vision/common/result.cc | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/fastdeploy/vision/common/result.cc b/fastdeploy/vision/common/result.cc
index f3b24c09eab..ee137604891 100755
--- a/fastdeploy/vision/common/result.cc
+++ b/fastdeploy/vision/common/result.cc
@@ -395,16 +395,12 @@ void MattingResult::Reserve(int size) {
 }
 
 void MattingResult::Resize(int size) {
-  if (alpha.capacity() < size) {
-    alpha.resize(size);
-  }
+  alpha.resize(size);
   if (contain_foreground) {
     FDASSERT((shape.size() == 3),
              "Please initial shape (h,w,c) before call Resize.");
     int c = static_cast<int>(shape[2]);
-    if (foreground.capacity() < size * c) {
-      foreground.resize(size * c);
-    }
+    foreground.resize(size * c);
   }
 }
 

From 44d5ae720b875ffba8042941fd2e9c935ddd4559 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Sat, 3 Dec 2022 11:17:41 +0000
Subject: [PATCH 49/50] fixed bugs

---
 .../paddledetection/cpp/infer_picodet.cc          | 15 ++++++---------
 .../paddledetection/cpp/infer_ppyoloe.cc          |  6 +++---
 2 files changed, 9 insertions(+), 12 deletions(-)
 mode change 100644 => 100755 examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc

diff --git a/examples/vision/detection/paddledetection/cpp/infer_picodet.cc b/examples/vision/detection/paddledetection/cpp/infer_picodet.cc
index 9ecd49e023a..9e71d88c4d9 100644
--- a/examples/vision/detection/paddledetection/cpp/infer_picodet.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_picodet.cc
@@ -34,16 +34,15 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
     std::cerr << "Failed to predict." << std::endl;
     return;
   }
 
   std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -63,16 +62,15 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
     std::cerr << "Failed to predict." << std::endl;
     return;
   }
 
   std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -93,16 +91,15 @@ void TrtInfer(const std::string& model_dir, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
     std::cerr << "Failed to predict." << std::endl;
     return;
   }
 
   std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc b/examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc
old mode 100644
new mode 100755
index 4559179387f..7ac11f23387
--- a/examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc
@@ -36,7 +36,7 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
   auto im = cv::imread(image_file);
 
   fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
     std::cerr << "Failed to predict." << std::endl;
     return;
   }
@@ -64,7 +64,7 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file) {
   auto im = cv::imread(image_file);
 
   fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
     std::cerr << "Failed to predict." << std::endl;
     return;
   }
@@ -93,7 +93,7 @@ void TrtInfer(const std::string& model_dir, const std::string& image_file) {
   auto im = cv::imread(image_file);
 
   fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
     std::cerr << "Failed to predict." << std::endl;
     return;
   }

From ed617b984d1d56acef229c38195b742a23341795 Mon Sep 17 00:00:00 2001
From: wjj19950828 <wjjisloser@163.com>
Date: Sat, 3 Dec 2022 12:05:11 +0000
Subject: [PATCH 50/50] add gpu ov for benchmark

---
 benchmark/benchmark_ppcls.py               |  5 +++++
 benchmark/benchmark_ppdet.py               | 11 +++++++++++
 benchmark/benchmark_ppseg.py               |  5 +++++
 benchmark/benchmark_yolo.py                |  5 +++++
 fastdeploy/backends/openvino/ov_backend.cc |  4 ++--
 5 files changed, 28 insertions(+), 2 deletions(-)
 mode change 100644 => 100755 fastdeploy/backends/openvino/ov_backend.cc

diff --git a/benchmark/benchmark_ppcls.py b/benchmark/benchmark_ppcls.py
index 8eeeb8cfca0..b4cbcd8c66f 100755
--- a/benchmark/benchmark_ppcls.py
+++ b/benchmark/benchmark_ppcls.py
@@ -75,6 +75,11 @@ def build_option(args):
             option.use_ort_backend()
         elif backend == "paddle":
             option.use_paddle_backend()
+        elif backend == "ov":
+            option.use_openvino_backend()
+            option.set_openvino_device(name="GPU")
+            # change name and shape for models
+            option.set_openvino_shape_info({"x": [1, 3, 224, 224]})
         elif backend in ["trt", "paddle_trt"]:
             option.use_trt_backend()
             if backend == "paddle_trt":
diff --git a/benchmark/benchmark_ppdet.py b/benchmark/benchmark_ppdet.py
index 6d08aafb8a8..1a2297b4f82 100755
--- a/benchmark/benchmark_ppdet.py
+++ b/benchmark/benchmark_ppdet.py
@@ -75,6 +75,17 @@ def build_option(args):
             option.use_ort_backend()
         elif backend == "paddle":
             option.use_paddle_backend()
+        elif backend == "ov":
+            option.use_openvino_backend()
+            # Using GPU and CPU heterogeneous execution mode
+            option.set_openvino_device("HETERO:GPU,CPU")
+            # change name and shape for models
+            option.set_openvino_shape_info({
+                "image": [1, 3, 320, 320],
+                "scale_factor": [1, 2]
+            })
+            # Set CPU up operator
+            option.set_openvino_cpu_operators(["MulticlassNms"])
         elif backend in ["trt", "paddle_trt"]:
             option.use_trt_backend()
             if backend == "paddle_trt":
diff --git a/benchmark/benchmark_ppseg.py b/benchmark/benchmark_ppseg.py
index 7d9df9f0778..b146510d614 100755
--- a/benchmark/benchmark_ppseg.py
+++ b/benchmark/benchmark_ppseg.py
@@ -75,6 +75,11 @@ def build_option(args):
             option.use_ort_backend()
         elif backend == "paddle":
             option.use_paddle_backend()
+        elif backend == "ov":
+            option.use_openvino_backend()
+            option.set_openvino_device(name="GPU")  # use gpu
+            # change name and shape for models
+            option.set_openvino_shape_info({"x": [1, 3, 512, 512]})
         elif backend in ["trt", "paddle_trt"]:
             option.use_trt_backend()
             if backend == "paddle_trt":
diff --git a/benchmark/benchmark_yolo.py b/benchmark/benchmark_yolo.py
index dd63cefb65a..a90bcab3de3 100755
--- a/benchmark/benchmark_yolo.py
+++ b/benchmark/benchmark_yolo.py
@@ -75,6 +75,11 @@ def build_option(args):
             option.use_ort_backend()
         elif backend == "paddle":
             option.use_paddle_backend()
+        elif backend == "ov":
+            option.use_openvino_backend()
+            option.set_openvino_device(name="GPU")
+            # change name and shape for models
+            option.set_openvino_shape_info({"images": [1, 3, 640, 640]})
         elif backend in ["trt", "paddle_trt"]:
             option.use_trt_backend()
             if backend == "paddle_trt":
diff --git a/fastdeploy/backends/openvino/ov_backend.cc b/fastdeploy/backends/openvino/ov_backend.cc
old mode 100644
new mode 100755
index 9e8c2571aba..6858f85471c
--- a/fastdeploy/backends/openvino/ov_backend.cc
+++ b/fastdeploy/backends/openvino/ov_backend.cc
@@ -176,7 +176,7 @@ bool OpenVINOBackend::InitFromPaddle(const std::string& model_file,
   }
 
   ov::AnyMap properties;
-  if (option_.cpu_thread_num > 0) {
+  if (option_.device == "CPU" && option_.cpu_thread_num > 0) {
     properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
   }
   if (option_.device == "CPU") {
@@ -306,7 +306,7 @@ bool OpenVINOBackend::InitFromOnnx(const std::string& model_file,
   }
 
   ov::AnyMap properties;
-  if (option_.cpu_thread_num > 0) {
+  if (option_.device == "CPU" && option_.cpu_thread_num > 0) {
     properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
   }
   if (option_.device == "CPU") {