From 018b9025e9c2b63b3c190c65f66a2d83c4af6442 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Tue, 1 Nov 2022 12:04:12 +0000 Subject: [PATCH 01/50] add paddle_trt in benchmark --- benchmark/README.md | 28 ++++++++++++++--- benchmark/benchmark_ppcls.py | 52 +++++++++++++++++++------------ benchmark/benchmark_ppdet.py | 52 +++++++++++++++++++------------ benchmark/benchmark_ppseg.py | 52 +++++++++++++++++++------------ benchmark/benchmark_yolo.py | 53 +++++++++++++++++++------------- benchmark/convert_info.py | 48 ++++++++++++++++++++++------- benchmark/requirements.txt | 2 -- benchmark/run_benchmark_ppcls.sh | 24 ++++++++------- benchmark/run_benchmark_ppdet.sh | 24 ++++++++------- benchmark/run_benchmark_ppseg.sh | 24 ++++++++------- benchmark/run_benchmark_yolo.sh | 16 +++++----- 11 files changed, 236 insertions(+), 139 deletions(-) diff --git a/benchmark/README.md b/benchmark/README.md index f01214ee65f..b1f96c1bea3 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -2,7 +2,7 @@ 在跑benchmark前,需确认以下两个步骤 -* 1. 软硬件环境满足要求,参考[FastDeploy环境要求](..//docs/cn/build_and_install/download_prebuilt_libraries.md) +* 1. 软硬件环境满足要求,参考[FastDeploy环境要求](..//docs/cn/build_and_install/download_prebuilt_libraries.md) * 2. FastDeploy Python whl包安装,参考[FastDeploy Python安装](../docs/cn/build_and_install/download_prebuilt_libraries.md) FastDeploy 目前支持多种推理后端,下面以 PaddleClas MobileNetV1 为例,跑出多后端在 CPU/GPU 对应 benchmark 数据 @@ -29,6 +29,12 @@ python benchmark_ppcls.py --model MobileNetV1_x0_25_infer --image ILSVRC2012_val # Paddle Inference python benchmark_ppcls.py --model MobileNetV1_x0_25_infer --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle +# Paddle Inference + TensorRT +python benchmark_ppcls.py --model MobileNetV1_x0_25_infer --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle_trt + +# Paddle Inference + TensorRT fp16 +python benchmark_ppcls.py --model MobileNetV1_x0_25_infer --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle_trt --enable_trt_fp16 True + # ONNX Runtime python benchmark_ppcls.py --model MobileNetV1_x0_25_infer --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend ort @@ -50,8 +56,9 @@ python benchmark_ppcls.py --model MobileNetV1_x0_25_infer --image ILSVRC2012_val | --cpu_num_thread | CPU 线程数 | | --device_id | GPU 卡号 | | --iter_num | 跑 benchmark 的迭代次数 | -| --backend | 指定后端类型,有ort, ov, trt, paddle四个选项 | -| --enable_trt_fp16 | 当后端为trt时,是否开启fp16 | +| --backend | 指定后端类型,有ort, ov, trt, paddle, paddle_trt 五个选项 | +| --enable_trt_fp16 | 当后端为trt或paddle_trt时,是否开启fp16 | +| --enable_collect_memory_info | 是否记录 cpu/gpu memory信息,默认 False | **最终txt结果** @@ -62,7 +69,7 @@ python benchmark_ppcls.py --model MobileNetV1_x0_25_infer --image ILSVRC2012_val cat *.txt >> ./result_ppcls.txt # 结构化信息 -python convert_info.py --txt_path result_ppcls.txt --domain ppcls +python convert_info.py --txt_path result_ppcls.txt --domain ppcls --enable_collect_memory_info True ``` 得到 CPU 结果```struct_cpu_ppcls.txt```以及 GPU 结果```struct_gpu_ppcls.txt```如下所示 @@ -89,3 +96,16 @@ sh run_benchmark_ppcls.sh ``` 一键得到所有模型在 CPU 以及 GPU 的 benchmark 数据 + + +**添加新设备** + +如果添加了一种新设备,想进行 benchmark 测试,以```ipu```为例 + +在对应 benchmark 脚本```--device```中加入```ipu```选项,并通过```option.use_ipu()```进行开启 + +输入下列命令,进行 benchmark 测试 + +```shell +python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --iter_num 2000 --backend paddle --device ipu +``` diff --git a/benchmark/benchmark_ppcls.py b/benchmark/benchmark_ppcls.py index 410f20e4129..5b05bfe6687 100644 --- a/benchmark/benchmark_ppcls.py +++ b/benchmark/benchmark_ppcls.py @@ -51,12 +51,17 @@ def parse_arguments(): "--backend", type=str, default="ort", - help="inference backend, ort, ov, trt, paddle.") + help="inference backend, ort, ov, trt, paddle, paddle_trt.") parser.add_argument( "--enable_trt_fp16", type=bool, default=False, help="whether enable fp16 in trt backend") + parser.add_argument( + "--enable_collect_memory_info", + type=bool, + default=False, + help="whether enable collect memory info") args = parser.parse_args() return args @@ -69,9 +74,11 @@ def build_option(args): if device == "gpu": option.use_gpu(args.device_id) - if backend == "trt": + if backend == "trt" or backend == "paddle_trt": assert device == "gpu", "the trt backend need device==gpu" option.use_trt_backend() + if backend == "paddle_trt": + option.enable_paddle_to_trt() if args.enable_trt_fp16: option.enable_trt_fp16() elif backend == "ov": @@ -130,45 +137,50 @@ def get_current_gputil(gpu_id): else: file_path = args.model + "_model_" + args.backend + "_" + args.device + ".txt" f = open(file_path, "w") - f.writelines("===={}====: \n".format(file_path.split("/")[1][:-4])) + f.writelines("===={}====: \n".format(os.path.split(file_path)[-1][:-4])) try: model = fd.vision.classification.PaddleClasModel( model_file, params_file, config_file, runtime_option=option) model.enable_record_time_of_runtime() + im_ori = cv2.imread(args.image) for i in range(args.iter_num): - im = cv2.imread(args.image) + im = im_ori start = time.time() result = model.predict(im) end2end_statis.append(time.time() - start) - gpu_util.append(get_current_gputil(gpu_id)) - cm, gm = get_current_memory_mb(gpu_id) - cpu_mem.append(cm) - gpu_mem.append(gm) + if args.enable_collect_memory_info: + gpu_util.append(get_current_gputil(gpu_id)) + cm, gm = get_current_memory_mb(gpu_id) + cpu_mem.append(cm) + gpu_mem.append(gm) runtime_statis = model.print_statis_info_of_runtime() warmup_iter = args.iter_num // 5 - repeat_iter = args.iter_num - warmup_iter end2end_statis_repeat = end2end_statis[warmup_iter:] - cpu_mem_repeat = cpu_mem[warmup_iter:] - gpu_mem_repeat = gpu_mem[warmup_iter:] - gpu_util_repeat = gpu_util[warmup_iter:] + if args.enable_collect_memory_info: + cpu_mem_repeat = cpu_mem[warmup_iter:] + gpu_mem_repeat = gpu_mem[warmup_iter:] + gpu_util_repeat = gpu_util[warmup_iter:] dump_result = dict() dump_result["runtime"] = runtime_statis["avg_time"] * 1000 dump_result["end2end"] = np.mean(end2end_statis_repeat) * 1000 - dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat) - dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat) - dump_result["gpu_util"] = np.mean(gpu_util_repeat) + if args.enable_collect_memory_info: + dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat) + dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat) + dump_result["gpu_util"] = np.mean(gpu_util_repeat) f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"]))) f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"]))) - f.writelines("cpu_rss_mb: {} \n".format( - str(dump_result["cpu_rss_mb"]))) - f.writelines("gpu_rss_mb: {} \n".format( - str(dump_result["gpu_rss_mb"]))) - f.writelines("gpu_util: {} \n".format(str(dump_result["gpu_util"]))) + if args.enable_collect_memory_info: + f.writelines("cpu_rss_mb: {} \n".format( + str(dump_result["cpu_rss_mb"]))) + f.writelines("gpu_rss_mb: {} \n".format( + str(dump_result["gpu_rss_mb"]))) + f.writelines("gpu_util: {} \n".format( + str(dump_result["gpu_util"]))) except: f.writelines("!!!!!Infer Failed\n") diff --git a/benchmark/benchmark_ppdet.py b/benchmark/benchmark_ppdet.py index 6b2f946f946..2f192de065b 100644 --- a/benchmark/benchmark_ppdet.py +++ b/benchmark/benchmark_ppdet.py @@ -53,12 +53,17 @@ def parse_arguments(): "--backend", type=str, default="ort", - help="inference backend, ort, ov, trt, paddle.") + help="inference backend, ort, ov, trt, paddle, paddle_trt.") parser.add_argument( "--enable_trt_fp16", type=bool, default=False, help="whether enable fp16 in trt backend") + parser.add_argument( + "--enable_collect_memory_info", + type=bool, + default=False, + help="whether enable collect memory info") args = parser.parse_args() return args @@ -71,9 +76,11 @@ def build_option(args): if device == "gpu": option.use_gpu(args.device_id) - if backend == "trt": + if backend == "trt" or backend == "paddle_trt": assert device == "gpu", "the trt backend need device==gpu" option.use_trt_backend() + if backend == "paddle_trt": + option.enable_paddle_to_trt() if args.enable_trt_fp16: option.enable_trt_fp16() elif backend == "ov": @@ -131,7 +138,7 @@ def get_current_gputil(gpu_id): else: file_path = args.model + "_model_" + args.backend + "_" + args.device + ".txt" f = open(file_path, "w") - f.writelines("===={}====: \n".format(file_path.split("/")[1][:-4])) + f.writelines("===={}====: \n".format(os.path.split(file_path)[-1][:-4])) try: if "ppyoloe" in args.model: @@ -156,39 +163,44 @@ def get_current_gputil(gpu_id): raise Exception("model {} not support now in ppdet series".format( args.model)) model.enable_record_time_of_runtime() + im_ori = cv2.imread(args.image) for i in range(args.iter_num): - im = cv2.imread(args.image) + im = im_ori start = time.time() result = model.predict(im) end2end_statis.append(time.time() - start) - gpu_util.append(get_current_gputil(gpu_id)) - cm, gm = get_current_memory_mb(gpu_id) - cpu_mem.append(cm) - gpu_mem.append(gm) + if args.enable_collect_memory_info: + gpu_util.append(get_current_gputil(gpu_id)) + cm, gm = get_current_memory_mb(gpu_id) + cpu_mem.append(cm) + gpu_mem.append(gm) runtime_statis = model.print_statis_info_of_runtime() warmup_iter = args.iter_num // 5 - repeat_iter = args.iter_num - warmup_iter end2end_statis_repeat = end2end_statis[warmup_iter:] - cpu_mem_repeat = cpu_mem[warmup_iter:] - gpu_mem_repeat = gpu_mem[warmup_iter:] - gpu_util_repeat = gpu_util[warmup_iter:] + if args.enable_collect_memory_info: + cpu_mem_repeat = cpu_mem[warmup_iter:] + gpu_mem_repeat = gpu_mem[warmup_iter:] + gpu_util_repeat = gpu_util[warmup_iter:] dump_result = dict() dump_result["runtime"] = runtime_statis["avg_time"] * 1000 dump_result["end2end"] = np.mean(end2end_statis_repeat) * 1000 - dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat) - dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat) - dump_result["gpu_util"] = np.mean(gpu_util_repeat) + if args.enable_collect_memory_info: + dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat) + dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat) + dump_result["gpu_util"] = np.mean(gpu_util_repeat) f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"]))) f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"]))) - f.writelines("cpu_rss_mb: {} \n".format( - str(dump_result["cpu_rss_mb"]))) - f.writelines("gpu_rss_mb: {} \n".format( - str(dump_result["gpu_rss_mb"]))) - f.writelines("gpu_util: {} \n".format(str(dump_result["gpu_util"]))) + if args.enable_collect_memory_info: + f.writelines("cpu_rss_mb: {} \n".format( + str(dump_result["cpu_rss_mb"]))) + f.writelines("gpu_rss_mb: {} \n".format( + str(dump_result["gpu_rss_mb"]))) + f.writelines("gpu_util: {} \n".format( + str(dump_result["gpu_util"]))) except: f.writelines("!!!!!Infer Failed\n") diff --git a/benchmark/benchmark_ppseg.py b/benchmark/benchmark_ppseg.py index 7c118cec525..81e0db797aa 100644 --- a/benchmark/benchmark_ppseg.py +++ b/benchmark/benchmark_ppseg.py @@ -51,12 +51,17 @@ def parse_arguments(): "--backend", type=str, default="ort", - help="inference backend, ort, ov, trt, paddle.") + help="inference backend, ort, ov, trt, paddle, paddle_trt.") parser.add_argument( "--enable_trt_fp16", type=bool, default=False, help="whether enable fp16 in trt backend") + parser.add_argument( + "--enable_collect_memory_info", + type=bool, + default=False, + help="whether enable collect memory info") args = parser.parse_args() return args @@ -69,9 +74,11 @@ def build_option(args): if device == "gpu": option.use_gpu(args.device_id) - if backend == "trt": + if backend == "trt" or backend == "paddle_trt": assert device == "gpu", "the trt backend need device==gpu" option.use_trt_backend() + if backend == "paddle_trt": + option.enable_paddle_to_trt() if args.enable_trt_fp16: option.enable_trt_fp16() elif backend == "ov": @@ -129,45 +136,50 @@ def get_current_gputil(gpu_id): else: file_path = args.model + "_model_" + args.backend + "_" + args.device + ".txt" f = open(file_path, "w") - f.writelines("===={}====: \n".format(file_path.split("/")[1][:-4])) + f.writelines("===={}====: \n".format(os.path.split(file_path)[-1][:-4])) try: model = fd.vision.segmentation.PaddleSegModel( model_file, params_file, config_file, runtime_option=option) model.enable_record_time_of_runtime() + im_ori = cv2.imread(args.image) for i in range(args.iter_num): - im = cv2.imread(args.image) + im = im_ori start = time.time() result = model.predict(im) end2end_statis.append(time.time() - start) - gpu_util.append(get_current_gputil(gpu_id)) - cm, gm = get_current_memory_mb(gpu_id) - cpu_mem.append(cm) - gpu_mem.append(gm) + if args.enable_collect_memory_info: + gpu_util.append(get_current_gputil(gpu_id)) + cm, gm = get_current_memory_mb(gpu_id) + cpu_mem.append(cm) + gpu_mem.append(gm) runtime_statis = model.print_statis_info_of_runtime() warmup_iter = args.iter_num // 5 - repeat_iter = args.iter_num - warmup_iter end2end_statis_repeat = end2end_statis[warmup_iter:] - cpu_mem_repeat = cpu_mem[warmup_iter:] - gpu_mem_repeat = gpu_mem[warmup_iter:] - gpu_util_repeat = gpu_util[warmup_iter:] + if args.enable_collect_memory_info: + cpu_mem_repeat = cpu_mem[warmup_iter:] + gpu_mem_repeat = gpu_mem[warmup_iter:] + gpu_util_repeat = gpu_util[warmup_iter:] dump_result = dict() dump_result["runtime"] = runtime_statis["avg_time"] * 1000 dump_result["end2end"] = np.mean(end2end_statis_repeat) * 1000 - dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat) - dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat) - dump_result["gpu_util"] = np.mean(gpu_util_repeat) + if args.enable_collect_memory_info: + dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat) + dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat) + dump_result["gpu_util"] = np.mean(gpu_util_repeat) f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"]))) f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"]))) - f.writelines("cpu_rss_mb: {} \n".format( - str(dump_result["cpu_rss_mb"]))) - f.writelines("gpu_rss_mb: {} \n".format( - str(dump_result["gpu_rss_mb"]))) - f.writelines("gpu_util: {} \n".format(str(dump_result["gpu_util"]))) + if args.enable_collect_memory_info: + f.writelines("cpu_rss_mb: {} \n".format( + str(dump_result["cpu_rss_mb"]))) + f.writelines("gpu_rss_mb: {} \n".format( + str(dump_result["gpu_rss_mb"]))) + f.writelines("gpu_util: {} \n".format( + str(dump_result["gpu_util"]))) except: f.writelines("!!!!!Infer Failed\n") diff --git a/benchmark/benchmark_yolo.py b/benchmark/benchmark_yolo.py index 81a87323c04..65e89a516a1 100644 --- a/benchmark/benchmark_yolo.py +++ b/benchmark/benchmark_yolo.py @@ -53,12 +53,17 @@ def parse_arguments(): "--backend", type=str, default="ort", - help="inference backend, ort, ov, trt, paddle.") + help="inference backend, ort, ov, trt, paddle, paddle_trt.") parser.add_argument( "--enable_trt_fp16", type=bool, default=False, help="whether enable fp16 in trt backend") + parser.add_argument( + "--enable_collect_memory_info", + type=bool, + default=False, + help="whether enable collect memory info") args = parser.parse_args() return args @@ -71,9 +76,11 @@ def build_option(args): if device == "gpu": option.use_gpu(args.device_id) - if backend == "trt": + if backend == "trt" or backend == "paddle_trt": assert device == "gpu", "the trt backend need device==gpu" option.use_trt_backend() + if backend == "paddle_trt": + option.enable_paddle_to_trt() if args.enable_trt_fp16: option.enable_trt_fp16() elif backend == "ov": @@ -129,7 +136,7 @@ def get_current_gputil(gpu_id): else: file_path = args.model + "_model_" + args.backend + "_" + args.device + ".txt" f = open(file_path, "w") - f.writelines("===={}====: \n".format(file_path.split("/")[1][:-4])) + f.writelines("===={}====: \n".format(os.path.split(file_path)[-1][:-4])) try: if "yolox" in model_file: @@ -148,40 +155,44 @@ def get_current_gputil(gpu_id): raise Exception("model {} not support now in yolo series".format( args.model)) model.enable_record_time_of_runtime() - + im_ori = cv2.imread(args.image) for i in range(args.iter_num): - im = cv2.imread(args.image) + im = im_ori start = time.time() result = model.predict(im) end2end_statis.append(time.time() - start) - gpu_util.append(get_current_gputil(gpu_id)) - cm, gm = get_current_memory_mb(gpu_id) - cpu_mem.append(cm) - gpu_mem.append(gm) + if args.enable_collect_memory_info: + gpu_util.append(get_current_gputil(gpu_id)) + cm, gm = get_current_memory_mb(gpu_id) + cpu_mem.append(cm) + gpu_mem.append(gm) runtime_statis = model.print_statis_info_of_runtime() warmup_iter = args.iter_num // 5 - repeat_iter = args.iter_num - warmup_iter end2end_statis_repeat = end2end_statis[warmup_iter:] - cpu_mem_repeat = cpu_mem[warmup_iter:] - gpu_mem_repeat = gpu_mem[warmup_iter:] - gpu_util_repeat = gpu_util[warmup_iter:] + if args.enable_collect_memory_info: + cpu_mem_repeat = cpu_mem[warmup_iter:] + gpu_mem_repeat = gpu_mem[warmup_iter:] + gpu_util_repeat = gpu_util[warmup_iter:] dump_result = dict() dump_result["runtime"] = runtime_statis["avg_time"] * 1000 dump_result["end2end"] = np.mean(end2end_statis_repeat) * 1000 - dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat) - dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat) - dump_result["gpu_util"] = np.mean(gpu_util_repeat) + if args.enable_collect_memory_info: + dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat) + dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat) + dump_result["gpu_util"] = np.mean(gpu_util_repeat) f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"]))) f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"]))) - f.writelines("cpu_rss_mb: {} \n".format( - str(dump_result["cpu_rss_mb"]))) - f.writelines("gpu_rss_mb: {} \n".format( - str(dump_result["gpu_rss_mb"]))) - f.writelines("gpu_util: {} \n".format(str(dump_result["gpu_util"]))) + if args.enable_collect_memory_info: + f.writelines("cpu_rss_mb: {} \n".format( + str(dump_result["cpu_rss_mb"]))) + f.writelines("gpu_rss_mb: {} \n".format( + str(dump_result["gpu_rss_mb"]))) + f.writelines("gpu_util: {} \n".format( + str(dump_result["gpu_util"]))) except: f.writelines("!!!!!Infer Failed\n") diff --git a/benchmark/convert_info.py b/benchmark/convert_info.py index 893ffecbec8..338a0cec582 100644 --- a/benchmark/convert_info.py +++ b/benchmark/convert_info.py @@ -18,9 +18,15 @@ parser = argparse.ArgumentParser(description='manual to this script') parser.add_argument('--txt_path', type=str, default='result.txt') parser.add_argument('--domain', type=str, default='ppcls') +parser.add_argument( + "--enable_collect_memory_info", + type=bool, + default=False, + help="whether enable collect memory info") args = parser.parse_args() txt_path = args.txt_path domain = args.domain +enable_collect_memory_info = args.enable_collect_memory_info f1 = open(txt_path, "r") lines = f1.readlines() @@ -33,6 +39,8 @@ paddle_cpu_thread1 = dict() paddle_cpu_thread8 = dict() paddle_gpu = dict() +paddle_trt_gpu = dict() +paddle_trt_gpu_fp16 = dict() trt_gpu = dict() trt_gpu_fp16 = dict() model_name_set = set() @@ -43,6 +51,8 @@ model_name_set.add(model_name) runtime = "-" end2end = "-" + cpu_rss_mb = "-" + gpu_rss_mb = "-" if "Runtime(ms)" in lines[i + 1]: runtime_ori = lines[i + 1].split(": ")[1] # two decimal places @@ -53,16 +63,17 @@ # two decimal places end2end_list = end2end_ori.split(".") end2end = end2end_list[0] + "." + end2end_list[1][:2] - if "cpu_rss_mb" in lines[i + 3]: - cpu_rss_mb_ori = lines[i + 3].split(": ")[1] - # two decimal places - cpu_rss_mb_list = cpu_rss_mb_ori.split(".") - cpu_rss_mb = cpu_rss_mb_list[0] + "." + cpu_rss_mb_list[1][:2] - if "gpu_rss_mb" in lines[i + 4]: - gpu_rss_mb_ori = lines[i + 4].split(": ")[1] - # two decimal places - gpu_rss_mb_list = gpu_rss_mb_ori.split(".") - gpu_rss_mb = gpu_rss_mb_list[0] + "." + gpu_rss_mb_list[1][:2] + if enable_collect_memory_info: + if "cpu_rss_mb" in lines[i + 3]: + cpu_rss_mb_ori = lines[i + 3].split(": ")[1] + # two decimal places + cpu_rss_mb_list = cpu_rss_mb_ori.split(".") + cpu_rss_mb = cpu_rss_mb_list[0] + "." + cpu_rss_mb_list[1][:2] + if "gpu_rss_mb" in lines[i + 4]: + gpu_rss_mb_ori = lines[i + 4].split(": ")[1] + # two decimal places + gpu_rss_mb_list = gpu_rss_mb_ori.split(".") + gpu_rss_mb = gpu_rss_mb_list[0] + "." + gpu_rss_mb_list[1][:2] if "ort_cpu_1" in lines[i]: ort_cpu_thread1[ model_name] = runtime + "\t" + end2end + "\t" + cpu_rss_mb @@ -86,6 +97,12 @@ elif "paddle_gpu" in lines[i]: paddle_gpu[ model_name] = runtime + "\t" + end2end + "\t" + gpu_rss_mb + elif "paddle_trt_gpu" in lines[i]: + paddle_trt_gpu[ + model_name] = runtime + "\t" + end2end + "\t" + gpu_rss_mb + elif "paddle_trt_fp16_gpu" in lines[i]: + paddle_trt_gpu_fp16[ + model_name] = runtime + "\t" + end2end + "\t" + gpu_rss_mb elif "trt_gpu" in lines[i]: trt_gpu[model_name] = runtime + "\t" + end2end + "\t" + gpu_rss_mb elif "trt_fp16_gpu" in lines[i]: @@ -131,7 +148,7 @@ f3 = open("struct_gpu_" + domain + ".txt", "w") f3.writelines( - "model_name\tort_run\tort_end2end\tgpu_rss_mb\tpaddle_run\tpaddle_end2end\tgpu_rss_mb\ttrt_run\ttrt_end2end\tgpu_rss_mb\ttrt_fp16_run\ttrt_fp16_end2end\tgpu_rss_mb\n" + "model_name\tort_run\tort_end2end\tgpu_rss_mb\tpaddle_run\tpaddle_end2end\tgpu_rss_mb\tpaddle_trt_run\tpaddle_trt_end2end\tgpu_rss_mb\tpaddle_trt_fp16_run\tpaddle_trt_fp16_end2end\tgpu_rss_mb\ttrt_run\ttrt_end2end\tgpu_rss_mb\ttrt_fp16_run\ttrt_fp16_end2end\tgpu_rss_mb\n" ) for model_name in model_name_set: lines1 = model_name + '\t' @@ -143,6 +160,15 @@ lines1 += paddle_gpu[model_name] + '\t' else: lines1 += "-\t-\t-\t" + if model_name in paddle_trt_gpu and paddle_trt_gpu[model_name] != "": + lines1 += paddle_trt_gpu[model_name] + '\t' + else: + lines1 += "-\t-\t-\t" + if model_name in paddle_trt_gpu_fp16 and paddle_trt_gpu_fp16[ + model_name] != "": + lines1 += paddle_trt_gpu_fp16[model_name] + '\t' + else: + lines1 += "-\t-\t-\t" if model_name in trt_gpu and trt_gpu[model_name] != "": lines1 += trt_gpu[model_name] + '\t' else: diff --git a/benchmark/requirements.txt b/benchmark/requirements.txt index 9f1d255ea7b..9d78d39fede 100644 --- a/benchmark/requirements.txt +++ b/benchmark/requirements.txt @@ -2,5 +2,3 @@ numpy pynvml psutil GPUtil -time -numpy diff --git a/benchmark/run_benchmark_ppcls.sh b/benchmark/run_benchmark_ppcls.sh index 71e33a93194..c82c0ac0106 100644 --- a/benchmark/run_benchmark_ppcls.sh +++ b/benchmark/run_benchmark_ppcls.sh @@ -6,16 +6,18 @@ counter=1 for model in $(ls -d ppcls_model/* ) do echo "[Benchmark-PPcls] ${counter}/${num_of_models} $model ..." - python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend ort - python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend ort - python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend paddle - python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend paddle - python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend ov - python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend ov - python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend ort - python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle - python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend trt - python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend trt --enable_trt_fp16 True + python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend ort --enable_collect_memory_info True + python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend ort --enable_collect_memory_info True + python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend paddle --enable_collect_memory_info True + python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend paddle --enable_collect_memory_info True + python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend ov --enable_collect_memory_info True + python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend ov --enable_collect_memory_info True + python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend ort --enable_collect_memory_info True + python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle --enable_collect_memory_info True + python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle_trt --enable_collect_memory_info True + python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle_trt --enable_trt_fp16 True --enable_collect_memory_info True + python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend trt --enable_collect_memory_info True + python benchmark_ppcls.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend trt --enable_trt_fp16 True --enable_collect_memory_info True counter=$(($counter+1)) step=$(( $counter % 1 )) if [ $step = 0 ] @@ -30,4 +32,4 @@ rm -rf result_ppcls.txt touch result_ppcls.txt cat ppcls_model/*.txt >> ./result_ppcls.txt -python convert_info.py --txt_path result_ppcls.txt --domain ppcls +python convert_info.py --txt_path result_ppcls.txt --domain ppcls --enable_collect_memory_info True diff --git a/benchmark/run_benchmark_ppdet.sh b/benchmark/run_benchmark_ppdet.sh index 55f7de39f20..3b27d350265 100644 --- a/benchmark/run_benchmark_ppdet.sh +++ b/benchmark/run_benchmark_ppdet.sh @@ -6,16 +6,18 @@ counter=1 for model in $(ls -d ppdet_model/* ) do echo "[Benchmark-PPdet] ${counter}/${num_of_models} $model ..." - python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 1 --iter_num 2000 --backend ort - python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 8 --iter_num 2000 --backend ort - python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 1 --iter_num 2000 --backend paddle - python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 8 --iter_num 2000 --backend paddle - python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 1 --iter_num 2000 --backend ov - python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 8 --iter_num 2000 --backend ov - python benchmark_ppdet.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend ort - python benchmark_ppdet.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend paddle - python benchmark_ppdet.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend trt - python benchmark_ppdet.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend trt --enable_trt_fp16 True + python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 1 --iter_num 2000 --backend ort --enable_collect_memory_info True + python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 8 --iter_num 2000 --backend ort --enable_collect_memory_info True + python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 1 --iter_num 2000 --backend paddle --enable_collect_memory_info True + python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 8 --iter_num 2000 --backend paddle --enable_collect_memory_info True + python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 1 --iter_num 2000 --backend ov --enable_collect_memory_info True + python benchmark_ppdet.py --model $model --image 000000014439.jpg --cpu_num_thread 8 --iter_num 2000 --backend ov --enable_collect_memory_info True + python benchmark_ppdet.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend ort --enable_collect_memory_info True + python benchmark_ppdet.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend paddle --enable_collect_memory_info True + python benchmark_ppdet.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend paddle_trt --enable_collect_memory_info True + python benchmark_ppdet.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend paddle_trt --enable_trt_fp16 True --enable_collect_memory_info True + python benchmark_ppdet.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend trt --enable_collect_memory_info True + python benchmark_ppdet.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend trt --enable_trt_fp16 True --enable_collect_memory_info True counter=$(($counter+1)) step=$(( $counter % 1 )) if [ $step = 0 ] @@ -30,4 +32,4 @@ rm -rf result_ppdet.txt touch result_ppdet.txt cat ppdet_model/*.txt >> ./result_ppdet.txt -python convert_info.py --txt_path result_ppdet.txt --domain ppdet +python convert_info.py --txt_path result_ppdet.txt --domain ppdet --enable_collect_memory_info True diff --git a/benchmark/run_benchmark_ppseg.sh b/benchmark/run_benchmark_ppseg.sh index e878c1529de..1964e80ebc2 100644 --- a/benchmark/run_benchmark_ppseg.sh +++ b/benchmark/run_benchmark_ppseg.sh @@ -6,16 +6,18 @@ counter=1 for model in $(ls -d ppseg_model/* ) do echo "[Benchmark-PPseg] ${counter}/${num_of_models} $model ..." - python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend ort - python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend ort - python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend paddle - python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend paddle - python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend ov - python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend ov - python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend ort - python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle - python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend trt - python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend trt --enable_trt_fp16 True + python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend ort --enable_collect_memory_info True + python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend ort --enable_collect_memory_info True + python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend paddle --enable_collect_memory_info True + python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend paddle --enable_collect_memory_info True + python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 1 --iter_num 2000 --backend ov --enable_collect_memory_info True + python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --cpu_num_thread 8 --iter_num 2000 --backend ov --enable_collect_memory_info True + python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend ort --enable_collect_memory_info True + python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle --enable_collect_memory_info True + python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle_trt --enable_collect_memory_info True + python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend paddle_trt --enable_trt_fp16 True --enable_collect_memory_info True + python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend trt --enable_collect_memory_info True + python benchmark_ppseg.py --model $model --image ILSVRC2012_val_00000010.jpeg --device gpu --iter_num 2000 --backend trt --enable_trt_fp16 True --enable_collect_memory_info True counter=$(($counter+1)) step=$(( $counter % 1 )) if [ $step = 0 ] @@ -30,4 +32,4 @@ rm -rf result_ppseg.txt touch result_ppseg.txt cat ppseg_model/*.txt >> ./result_ppseg.txt -python convert_info.py --txt_path result_ppseg.txt --domain ppseg +python convert_info.py --txt_path result_ppseg.txt --domain ppseg --enable_collect_memory_info True diff --git a/benchmark/run_benchmark_yolo.sh b/benchmark/run_benchmark_yolo.sh index 6f36c41eb57..7ec625fcc4b 100755 --- a/benchmark/run_benchmark_yolo.sh +++ b/benchmark/run_benchmark_yolo.sh @@ -6,13 +6,13 @@ counter=1 for model in $(ls -d yolo_model/* ) do echo "[Benchmark-Yolo] ${counter}/${num_of_models} $model ..." - python benchmark_yolo.py --model $model --image 000000014439.jpg --cpu_num_thread 1 --iter_num 2000 --backend ort - python benchmark_yolo.py --model $model --image 000000014439.jpg --cpu_num_thread 8 --iter_num 2000 --backend ort - python benchmark_yolo.py --model $model --image 000000014439.jpg --cpu_num_thread 1 --iter_num 2000 --backend ov - python benchmark_yolo.py --model $model --image 000000014439.jpg --cpu_num_thread 8 --iter_num 2000 --backend ov - python benchmark_yolo.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend ort - python benchmark_yolo.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend trt - python benchmark_yolo.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend trt --enable_trt_fp16 True + python benchmark_yolo.py --model $model --image 000000014439.jpg --cpu_num_thread 1 --iter_num 2000 --backend ort --enable_collect_memory_info True + python benchmark_yolo.py --model $model --image 000000014439.jpg --cpu_num_thread 8 --iter_num 2000 --backend ort --enable_collect_memory_info True + python benchmark_yolo.py --model $model --image 000000014439.jpg --cpu_num_thread 1 --iter_num 2000 --backend ov --enable_collect_memory_info True + python benchmark_yolo.py --model $model --image 000000014439.jpg --cpu_num_thread 8 --iter_num 2000 --backend ov --enable_collect_memory_info True + python benchmark_yolo.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend ort --enable_collect_memory_info True + python benchmark_yolo.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend trt --enable_collect_memory_info True + python benchmark_yolo.py --model $model --image 000000014439.jpg --device gpu --iter_num 2000 --backend trt --enable_trt_fp16 True --enable_collect_memory_info True counter=$(($counter+1)) step=$(( $counter % 1 )) if [ $step = 0 ] @@ -27,4 +27,4 @@ rm -rf result_yolo.txt touch result_yolo.txt cat yolo_model/*.txt >> ./result_yolo.txt -python convert_info.py --txt_path result_yolo.txt --domain yolo +python convert_info.py --txt_path result_yolo.txt --domain yolo --enable_collect_memory_info True From 00002346ce625702f43a89ae22837ae54cd175e6 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Wed, 2 Nov 2022 12:31:05 +0000 Subject: [PATCH 02/50] update benchmark in device --- benchmark/benchmark_ppcls.py | 80 ++++++++++++++++++++++++------------ benchmark/benchmark_ppdet.py | 80 ++++++++++++++++++++++++------------ benchmark/benchmark_ppseg.py | 80 ++++++++++++++++++++++++------------ benchmark/benchmark_yolo.py | 80 ++++++++++++++++++++++++------------ 4 files changed, 216 insertions(+), 104 deletions(-) diff --git a/benchmark/benchmark_ppcls.py b/benchmark/benchmark_ppcls.py index 5b05bfe6687..914ace71b03 100644 --- a/benchmark/benchmark_ppcls.py +++ b/benchmark/benchmark_ppcls.py @@ -22,9 +22,19 @@ import time +def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() == 'true': + return True + elif v.lower() == 'false': + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + + def parse_arguments(): import argparse - import ast parser = argparse.ArgumentParser() parser.add_argument( "--model", required=True, help="Path of PaddleClas model.") @@ -50,16 +60,16 @@ def parse_arguments(): parser.add_argument( "--backend", type=str, - default="ort", - help="inference backend, ort, ov, trt, paddle, paddle_trt.") + default="default", + help="inference backend, default, ort, ov, trt, paddle, paddle_trt.") parser.add_argument( "--enable_trt_fp16", - type=bool, + type=str2bool, default=False, help="whether enable fp16 in trt backend") parser.add_argument( "--enable_collect_memory_info", - type=bool, + type=str2bool, default=False, help="whether enable collect memory info") args = parser.parse_args() @@ -70,26 +80,43 @@ def build_option(args): option = fd.RuntimeOption() device = args.device backend = args.backend + enable_trt_fp16 = args.enable_trt_fp16 option.set_cpu_thread_num(args.cpu_num_thread) if device == "gpu": - option.use_gpu(args.device_id) - - if backend == "trt" or backend == "paddle_trt": - assert device == "gpu", "the trt backend need device==gpu" - option.use_trt_backend() - if backend == "paddle_trt": - option.enable_paddle_to_trt() - if args.enable_trt_fp16: - option.enable_trt_fp16() - elif backend == "ov": - assert device == "cpu", "the openvino backend need device==cpu" - option.use_openvino_backend() - elif backend == "paddle": - option.use_paddle_backend() - elif backend == "ort": - option.use_ort_backend() + option.use_gpu() + if backend == "ort": + option.use_ort_backend() + elif backend == "paddle": + option.use_paddle_backend() + elif backend in ["trt", "paddle_trt"]: + option.use_trt_backend() + if backend == "paddle_trt": + option.enable_paddle_to_trt() + if enable_trt_fp16: + option.enable_trt_fp16() + elif backend == "default": + return option + else: + raise Exception( + "While inference with GPU, only support default/ort/paddle/trt/paddle_trt now, {} is not supported.". + format(backend)) + elif device == "cpu": + if backend == "ort": + option.use_ort_backend() + elif backend == "ov": + option.use_openvino_backend() + elif backend == "paddle": + option.use_paddle_backend() + elif backend == "default": + return option + else: + raise Exception( + "While inference with CPU, only support default/ort/ov/paddle now, {} is not supported.". + format(backend)) else: - print("%s is an unsupported backend" % backend) + raise Exception( + "Only support device CPU/GPU now, {} is not supported.".format( + device)) return option @@ -123,6 +150,7 @@ def get_current_gputil(gpu_id): config_file = os.path.join(args.model, "inference_cls.yaml") gpu_id = args.device_id + enable_collect_memory_info = args.enable_collect_memory_info end2end_statis = list() cpu_mem = list() gpu_mem = list() @@ -149,7 +177,7 @@ def get_current_gputil(gpu_id): start = time.time() result = model.predict(im) end2end_statis.append(time.time() - start) - if args.enable_collect_memory_info: + if enable_collect_memory_info: gpu_util.append(get_current_gputil(gpu_id)) cm, gm = get_current_memory_mb(gpu_id) cpu_mem.append(cm) @@ -159,7 +187,7 @@ def get_current_gputil(gpu_id): warmup_iter = args.iter_num // 5 end2end_statis_repeat = end2end_statis[warmup_iter:] - if args.enable_collect_memory_info: + if enable_collect_memory_info: cpu_mem_repeat = cpu_mem[warmup_iter:] gpu_mem_repeat = gpu_mem[warmup_iter:] gpu_util_repeat = gpu_util[warmup_iter:] @@ -167,14 +195,14 @@ def get_current_gputil(gpu_id): dump_result = dict() dump_result["runtime"] = runtime_statis["avg_time"] * 1000 dump_result["end2end"] = np.mean(end2end_statis_repeat) * 1000 - if args.enable_collect_memory_info: + if enable_collect_memory_info: dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat) dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat) dump_result["gpu_util"] = np.mean(gpu_util_repeat) f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"]))) f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"]))) - if args.enable_collect_memory_info: + if enable_collect_memory_info: f.writelines("cpu_rss_mb: {} \n".format( str(dump_result["cpu_rss_mb"]))) f.writelines("gpu_rss_mb: {} \n".format( diff --git a/benchmark/benchmark_ppdet.py b/benchmark/benchmark_ppdet.py index 2f192de065b..cb8d47f44ea 100644 --- a/benchmark/benchmark_ppdet.py +++ b/benchmark/benchmark_ppdet.py @@ -24,9 +24,19 @@ import time +def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() == 'true': + return True + elif v.lower() == 'false': + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + + def parse_arguments(): import argparse - import ast parser = argparse.ArgumentParser() parser.add_argument( "--model", required=True, help="Path of PaddleDetection model.") @@ -52,16 +62,16 @@ def parse_arguments(): parser.add_argument( "--backend", type=str, - default="ort", - help="inference backend, ort, ov, trt, paddle, paddle_trt.") + default="default", + help="inference backend, default, ort, ov, trt, paddle, paddle_trt.") parser.add_argument( "--enable_trt_fp16", - type=bool, + type=str2bool, default=False, help="whether enable fp16 in trt backend") parser.add_argument( "--enable_collect_memory_info", - type=bool, + type=str2bool, default=False, help="whether enable collect memory info") args = parser.parse_args() @@ -72,26 +82,43 @@ def build_option(args): option = fd.RuntimeOption() device = args.device backend = args.backend + enable_trt_fp16 = args.enable_trt_fp16 option.set_cpu_thread_num(args.cpu_num_thread) if device == "gpu": - option.use_gpu(args.device_id) - - if backend == "trt" or backend == "paddle_trt": - assert device == "gpu", "the trt backend need device==gpu" - option.use_trt_backend() - if backend == "paddle_trt": - option.enable_paddle_to_trt() - if args.enable_trt_fp16: - option.enable_trt_fp16() - elif backend == "ov": - assert device == "cpu", "the openvino backend need device==cpu" - option.use_openvino_backend() - elif backend == "paddle": - option.use_paddle_backend() - elif backend == "ort": - option.use_ort_backend() + option.use_gpu() + if backend == "ort": + option.use_ort_backend() + elif backend == "paddle": + option.use_paddle_backend() + elif backend in ["trt", "paddle_trt"]: + option.use_trt_backend() + if backend == "paddle_trt": + option.enable_paddle_to_trt() + if enable_trt_fp16: + option.enable_trt_fp16() + elif backend == "default": + return option + else: + raise Exception( + "While inference with GPU, only support default/ort/paddle/trt/paddle_trt now, {} is not supported.". + format(backend)) + elif device == "cpu": + if backend == "ort": + option.use_ort_backend() + elif backend == "ov": + option.use_openvino_backend() + elif backend == "paddle": + option.use_paddle_backend() + elif backend == "default": + return option + else: + raise Exception( + "While inference with CPU, only support default/ort/ov/paddle now, {} is not supported.". + format(backend)) else: - print("%s is an unsupported backend" % backend) + raise Exception( + "Only support device CPU/GPU now, {} is not supported.".format( + device)) return option @@ -125,6 +152,7 @@ def get_current_gputil(gpu_id): config_file = os.path.join(args.model, "infer_cfg.yml") gpu_id = args.device_id + enable_collect_memory_info = args.enable_collect_memory_info end2end_statis = list() cpu_mem = list() gpu_mem = list() @@ -169,7 +197,7 @@ def get_current_gputil(gpu_id): start = time.time() result = model.predict(im) end2end_statis.append(time.time() - start) - if args.enable_collect_memory_info: + if enable_collect_memory_info: gpu_util.append(get_current_gputil(gpu_id)) cm, gm = get_current_memory_mb(gpu_id) cpu_mem.append(cm) @@ -179,7 +207,7 @@ def get_current_gputil(gpu_id): warmup_iter = args.iter_num // 5 end2end_statis_repeat = end2end_statis[warmup_iter:] - if args.enable_collect_memory_info: + if enable_collect_memory_info: cpu_mem_repeat = cpu_mem[warmup_iter:] gpu_mem_repeat = gpu_mem[warmup_iter:] gpu_util_repeat = gpu_util[warmup_iter:] @@ -187,14 +215,14 @@ def get_current_gputil(gpu_id): dump_result = dict() dump_result["runtime"] = runtime_statis["avg_time"] * 1000 dump_result["end2end"] = np.mean(end2end_statis_repeat) * 1000 - if args.enable_collect_memory_info: + if enable_collect_memory_info: dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat) dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat) dump_result["gpu_util"] = np.mean(gpu_util_repeat) f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"]))) f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"]))) - if args.enable_collect_memory_info: + if enable_collect_memory_info: f.writelines("cpu_rss_mb: {} \n".format( str(dump_result["cpu_rss_mb"]))) f.writelines("gpu_rss_mb: {} \n".format( diff --git a/benchmark/benchmark_ppseg.py b/benchmark/benchmark_ppseg.py index 81e0db797aa..2c7a37c2f14 100644 --- a/benchmark/benchmark_ppseg.py +++ b/benchmark/benchmark_ppseg.py @@ -22,9 +22,19 @@ import time +def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() == 'true': + return True + elif v.lower() == 'false': + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + + def parse_arguments(): import argparse - import ast parser = argparse.ArgumentParser() parser.add_argument( "--model", required=True, help="Path of PaddleSeg model.") @@ -50,16 +60,16 @@ def parse_arguments(): parser.add_argument( "--backend", type=str, - default="ort", - help="inference backend, ort, ov, trt, paddle, paddle_trt.") + default="default", + help="inference backend, default, ort, ov, trt, paddle, paddle_trt.") parser.add_argument( "--enable_trt_fp16", - type=bool, + type=str2bool, default=False, help="whether enable fp16 in trt backend") parser.add_argument( "--enable_collect_memory_info", - type=bool, + type=str2bool, default=False, help="whether enable collect memory info") args = parser.parse_args() @@ -70,26 +80,43 @@ def build_option(args): option = fd.RuntimeOption() device = args.device backend = args.backend + enable_trt_fp16 = args.enable_trt_fp16 option.set_cpu_thread_num(args.cpu_num_thread) if device == "gpu": - option.use_gpu(args.device_id) - - if backend == "trt" or backend == "paddle_trt": - assert device == "gpu", "the trt backend need device==gpu" - option.use_trt_backend() - if backend == "paddle_trt": - option.enable_paddle_to_trt() - if args.enable_trt_fp16: - option.enable_trt_fp16() - elif backend == "ov": - assert device == "cpu", "the openvino backend need device==cpu" - option.use_openvino_backend() - elif backend == "paddle": - option.use_paddle_backend() - elif backend == "ort": - option.use_ort_backend() + option.use_gpu() + if backend == "ort": + option.use_ort_backend() + elif backend == "paddle": + option.use_paddle_backend() + elif backend in ["trt", "paddle_trt"]: + option.use_trt_backend() + if backend == "paddle_trt": + option.enable_paddle_to_trt() + if enable_trt_fp16: + option.enable_trt_fp16() + elif backend == "default": + return option + else: + raise Exception( + "While inference with GPU, only support default/ort/paddle/trt/paddle_trt now, {} is not supported.". + format(backend)) + elif device == "cpu": + if backend == "ort": + option.use_ort_backend() + elif backend == "ov": + option.use_openvino_backend() + elif backend == "paddle": + option.use_paddle_backend() + elif backend == "default": + return option + else: + raise Exception( + "While inference with CPU, only support default/ort/ov/paddle now, {} is not supported.". + format(backend)) else: - print("%s is an unsupported backend" % backend) + raise Exception( + "Only support device CPU/GPU now, {} is not supported.".format( + device)) return option @@ -123,6 +150,7 @@ def get_current_gputil(gpu_id): config_file = os.path.join(args.model, "deploy.yaml") gpu_id = args.device_id + enable_collect_memory_info = args.enable_collect_memory_info end2end_statis = list() cpu_mem = list() gpu_mem = list() @@ -148,7 +176,7 @@ def get_current_gputil(gpu_id): start = time.time() result = model.predict(im) end2end_statis.append(time.time() - start) - if args.enable_collect_memory_info: + if enable_collect_memory_info: gpu_util.append(get_current_gputil(gpu_id)) cm, gm = get_current_memory_mb(gpu_id) cpu_mem.append(cm) @@ -158,7 +186,7 @@ def get_current_gputil(gpu_id): warmup_iter = args.iter_num // 5 end2end_statis_repeat = end2end_statis[warmup_iter:] - if args.enable_collect_memory_info: + if enable_collect_memory_info: cpu_mem_repeat = cpu_mem[warmup_iter:] gpu_mem_repeat = gpu_mem[warmup_iter:] gpu_util_repeat = gpu_util[warmup_iter:] @@ -166,14 +194,14 @@ def get_current_gputil(gpu_id): dump_result = dict() dump_result["runtime"] = runtime_statis["avg_time"] * 1000 dump_result["end2end"] = np.mean(end2end_statis_repeat) * 1000 - if args.enable_collect_memory_info: + if enable_collect_memory_info: dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat) dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat) dump_result["gpu_util"] = np.mean(gpu_util_repeat) f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"]))) f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"]))) - if args.enable_collect_memory_info: + if enable_collect_memory_info: f.writelines("cpu_rss_mb: {} \n".format( str(dump_result["cpu_rss_mb"]))) f.writelines("gpu_rss_mb: {} \n".format( diff --git a/benchmark/benchmark_yolo.py b/benchmark/benchmark_yolo.py index 65e89a516a1..f534c43f3e0 100644 --- a/benchmark/benchmark_yolo.py +++ b/benchmark/benchmark_yolo.py @@ -24,9 +24,19 @@ import time +def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() == 'true': + return True + elif v.lower() == 'false': + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + + def parse_arguments(): import argparse - import ast parser = argparse.ArgumentParser() parser.add_argument( "--model", required=True, help="Path of Yolo onnx model.") @@ -52,16 +62,16 @@ def parse_arguments(): parser.add_argument( "--backend", type=str, - default="ort", - help="inference backend, ort, ov, trt, paddle, paddle_trt.") + default="default", + help="inference backend, default, ort, ov, trt, paddle, paddle_trt.") parser.add_argument( "--enable_trt_fp16", - type=bool, + type=str2bool, default=False, help="whether enable fp16 in trt backend") parser.add_argument( "--enable_collect_memory_info", - type=bool, + type=str2bool, default=False, help="whether enable collect memory info") args = parser.parse_args() @@ -72,26 +82,43 @@ def build_option(args): option = fd.RuntimeOption() device = args.device backend = args.backend + enable_trt_fp16 = args.enable_trt_fp16 option.set_cpu_thread_num(args.cpu_num_thread) if device == "gpu": - option.use_gpu(args.device_id) - - if backend == "trt" or backend == "paddle_trt": - assert device == "gpu", "the trt backend need device==gpu" - option.use_trt_backend() - if backend == "paddle_trt": - option.enable_paddle_to_trt() - if args.enable_trt_fp16: - option.enable_trt_fp16() - elif backend == "ov": - assert device == "cpu", "the openvino backend need device==cpu" - option.use_openvino_backend() - elif backend == "paddle": - option.use_paddle_backend() - elif backend == "ort": - option.use_ort_backend() + option.use_gpu() + if backend == "ort": + option.use_ort_backend() + elif backend == "paddle": + option.use_paddle_backend() + elif backend in ["trt", "paddle_trt"]: + option.use_trt_backend() + if backend == "paddle_trt": + option.enable_paddle_to_trt() + if enable_trt_fp16: + option.enable_trt_fp16() + elif backend == "default": + return option + else: + raise Exception( + "While inference with GPU, only support default/ort/paddle/trt/paddle_trt now, {} is not supported.". + format(backend)) + elif device == "cpu": + if backend == "ort": + option.use_ort_backend() + elif backend == "ov": + option.use_openvino_backend() + elif backend == "paddle": + option.use_paddle_backend() + elif backend == "default": + return option + else: + raise Exception( + "While inference with CPU, only support default/ort/ov/paddle now, {} is not supported.". + format(backend)) else: - print("%s is an unsupported backend" % backend) + raise Exception( + "Only support device CPU/GPU now, {} is not supported.".format( + device)) return option @@ -123,6 +150,7 @@ def get_current_gputil(gpu_id): model_file = args.model gpu_id = args.device_id + enable_collect_memory_info = args.enable_collect_memory_info end2end_statis = list() cpu_mem = list() gpu_mem = list() @@ -161,7 +189,7 @@ def get_current_gputil(gpu_id): start = time.time() result = model.predict(im) end2end_statis.append(time.time() - start) - if args.enable_collect_memory_info: + if enable_collect_memory_info: gpu_util.append(get_current_gputil(gpu_id)) cm, gm = get_current_memory_mb(gpu_id) cpu_mem.append(cm) @@ -171,7 +199,7 @@ def get_current_gputil(gpu_id): warmup_iter = args.iter_num // 5 end2end_statis_repeat = end2end_statis[warmup_iter:] - if args.enable_collect_memory_info: + if enable_collect_memory_info: cpu_mem_repeat = cpu_mem[warmup_iter:] gpu_mem_repeat = gpu_mem[warmup_iter:] gpu_util_repeat = gpu_util[warmup_iter:] @@ -179,14 +207,14 @@ def get_current_gputil(gpu_id): dump_result = dict() dump_result["runtime"] = runtime_statis["avg_time"] * 1000 dump_result["end2end"] = np.mean(end2end_statis_repeat) * 1000 - if args.enable_collect_memory_info: + if enable_collect_memory_info: dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat) dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat) dump_result["gpu_util"] = np.mean(gpu_util_repeat) f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"]))) f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"]))) - if args.enable_collect_memory_info: + if enable_collect_memory_info: f.writelines("cpu_rss_mb: {} \n".format( str(dump_result["cpu_rss_mb"]))) f.writelines("gpu_rss_mb: {} \n".format( From 20ddf39303b51c786463d749e11c9e03f7a6550a Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Thu, 3 Nov 2022 03:05:05 +0000 Subject: [PATCH 03/50] update benchmark --- benchmark/benchmark_ppcls.py | 16 +++------------- benchmark/benchmark_ppdet.py | 16 +++------------- benchmark/benchmark_ppseg.py | 16 +++------------- benchmark/benchmark_yolo.py | 16 +++------------- 4 files changed, 12 insertions(+), 52 deletions(-) diff --git a/benchmark/benchmark_ppcls.py b/benchmark/benchmark_ppcls.py index 914ace71b03..039a07cc9e1 100755 --- a/benchmark/benchmark_ppcls.py +++ b/benchmark/benchmark_ppcls.py @@ -22,19 +22,9 @@ import time -def str2bool(v): - if isinstance(v, bool): - return v - if v.lower() == 'true': - return True - elif v.lower() == 'false': - return False - else: - raise argparse.ArgumentTypeError('Boolean value expected.') - - def parse_arguments(): import argparse + import ast parser = argparse.ArgumentParser() parser.add_argument( "--model", required=True, help="Path of PaddleClas model.") @@ -64,12 +54,12 @@ def parse_arguments(): help="inference backend, default, ort, ov, trt, paddle, paddle_trt.") parser.add_argument( "--enable_trt_fp16", - type=str2bool, + type=ast.literal_eval, default=False, help="whether enable fp16 in trt backend") parser.add_argument( "--enable_collect_memory_info", - type=str2bool, + type=ast.literal_eval, default=False, help="whether enable collect memory info") args = parser.parse_args() diff --git a/benchmark/benchmark_ppdet.py b/benchmark/benchmark_ppdet.py index cb8d47f44ea..6cabc4d4e99 100755 --- a/benchmark/benchmark_ppdet.py +++ b/benchmark/benchmark_ppdet.py @@ -24,19 +24,9 @@ import time -def str2bool(v): - if isinstance(v, bool): - return v - if v.lower() == 'true': - return True - elif v.lower() == 'false': - return False - else: - raise argparse.ArgumentTypeError('Boolean value expected.') - - def parse_arguments(): import argparse + import ast parser = argparse.ArgumentParser() parser.add_argument( "--model", required=True, help="Path of PaddleDetection model.") @@ -66,12 +56,12 @@ def parse_arguments(): help="inference backend, default, ort, ov, trt, paddle, paddle_trt.") parser.add_argument( "--enable_trt_fp16", - type=str2bool, + type=ast.literal_eval, default=False, help="whether enable fp16 in trt backend") parser.add_argument( "--enable_collect_memory_info", - type=str2bool, + type=ast.literal_eval, default=False, help="whether enable collect memory info") args = parser.parse_args() diff --git a/benchmark/benchmark_ppseg.py b/benchmark/benchmark_ppseg.py index 2c7a37c2f14..ef57e371502 100755 --- a/benchmark/benchmark_ppseg.py +++ b/benchmark/benchmark_ppseg.py @@ -22,19 +22,9 @@ import time -def str2bool(v): - if isinstance(v, bool): - return v - if v.lower() == 'true': - return True - elif v.lower() == 'false': - return False - else: - raise argparse.ArgumentTypeError('Boolean value expected.') - - def parse_arguments(): import argparse + import ast parser = argparse.ArgumentParser() parser.add_argument( "--model", required=True, help="Path of PaddleSeg model.") @@ -64,12 +54,12 @@ def parse_arguments(): help="inference backend, default, ort, ov, trt, paddle, paddle_trt.") parser.add_argument( "--enable_trt_fp16", - type=str2bool, + type=ast.literal_eval, default=False, help="whether enable fp16 in trt backend") parser.add_argument( "--enable_collect_memory_info", - type=str2bool, + type=ast.literal_eval, default=False, help="whether enable collect memory info") args = parser.parse_args() diff --git a/benchmark/benchmark_yolo.py b/benchmark/benchmark_yolo.py index f534c43f3e0..aa6927c8337 100755 --- a/benchmark/benchmark_yolo.py +++ b/benchmark/benchmark_yolo.py @@ -24,19 +24,9 @@ import time -def str2bool(v): - if isinstance(v, bool): - return v - if v.lower() == 'true': - return True - elif v.lower() == 'false': - return False - else: - raise argparse.ArgumentTypeError('Boolean value expected.') - - def parse_arguments(): import argparse + import ast parser = argparse.ArgumentParser() parser.add_argument( "--model", required=True, help="Path of Yolo onnx model.") @@ -66,12 +56,12 @@ def parse_arguments(): help="inference backend, default, ort, ov, trt, paddle, paddle_trt.") parser.add_argument( "--enable_trt_fp16", - type=str2bool, + type=ast.literal_eval, default=False, help="whether enable fp16 in trt backend") parser.add_argument( "--enable_collect_memory_info", - type=str2bool, + type=ast.literal_eval, default=False, help="whether enable collect memory info") args = parser.parse_args() From 2ce400549d42898430d234b2b8592a88a4aa526c Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Fri, 4 Nov 2022 03:50:23 +0000 Subject: [PATCH 04/50] update result doc --- docs/api_docs/python/vision_results_cn.md | 17 +++++++++++++++++ docs/api_docs/python/vision_results_en.md | 20 ++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/docs/api_docs/python/vision_results_cn.md b/docs/api_docs/python/vision_results_cn.md index dab22e6a59c..19b2a6662da 100644 --- a/docs/api_docs/python/vision_results_cn.md +++ b/docs/api_docs/python/vision_results_cn.md @@ -16,6 +16,7 @@ API:`fastdeploy.vision.SegmentationResult`, 该结果返回: - **score_map**(list of float): 成员变量,与label_map一一对应的所预测的分割类别概率值(当导出模型时指定`--output_op argmax`)或者经过softmax归一化化后的概率值(当导出模型时指定`--output_op softmax`或者导出模型时指定`--output_op none`同时模型初始化的时候设置模型类成员属性`apply_softmax=true`). - **shape**(list of int): 成员变量,表示输出图片的尺寸,为`H*W`. + ## DetectionResult DetectionResult代码定义在`fastdeploy/vision/common/result.h`中,用于表明图像检测出来的目标框、目标类别和目标置信度. @@ -40,6 +41,7 @@ API:`fastdeploy.vision.FaceDetectionResult` , 该结果返回: - **landmarks**(list of list(float)): 成员变量,表示单张图片检测出来的所有人脸的关键点. - **landmarks_per_face**(int): 成员变量,表示每个人脸框中的关键点的数量. + ## KeyPointDetectionResult KeyPointDetectionResult 代码定义在`fastdeploy/vision/common/result.h`中,用于表明图像中目标行为的各个关键点坐标和置信度。 @@ -70,6 +72,7 @@ API:`fastdeploy.vision.MattingResult`, 该结果返回: - **contain_foreground**(bool): 表示预测的结果是否包含前景. - **shape**(list of int): 表示输出结果的shape,当`contain_foreground`为`false`,shape只包含`(H,W)`,当`contain_foreground`为`true`,shape包含`(H,W,C)`, C一般为3. + ## OCRResult OCRResult代码定义在`fastdeploy/vision/common/result.h`中,用于表明图像检测和识别出来的文本框,文本框方向分类,以及文本框内的文本内容. @@ -79,3 +82,17 @@ API:`fastdeploy.vision.OCRResult`, 该结果返回: - **rec_scores**(list of float): 成员变量,表示文本框内识别出来的文本的置信度,其元素个数与`boxes.size()`一致. - **cls_scores**(list of float): 成员变量,表示文本框的分类结果的置信度,其元素个数与`boxes.size()`一致. - **cls_labels**(list of int): 成员变量,表示文本框的方向分类类别,其元素个数与`boxes.size()`一致. + + +## FaceAlignmentResult +FaceAlignmentResult 代码定义在`fastdeploy/vision/common/result.h`中,用于表明人脸landmarks。 + +API:`fastdeploy.vision.FaceAlignmentResult`, 该结果返回: +- **landmarks**(list of list(float)): 成员变量,表示单张人脸图片检测出来的所有关键点 + + +## HeadPoseResult +HeadPoseResult 代码定义在`fastdeploy/vision/common/result.h`中,用于表明头部姿态结果。 + +API:`fastdeploy.vision.HeadPoseResult`, 该结果返回: +- **euler_angles**(list of float): 成员变量,表示单张人脸图片预测的欧拉角,存放的顺序是(yaw, pitch, roll), yaw 代表水平转角,pitch 代表垂直角,roll 代表翻滚角,值域都为 [-90, +90]度 diff --git a/docs/api_docs/python/vision_results_en.md b/docs/api_docs/python/vision_results_en.md index 513a011d7a3..cbf4e2d5ae1 100644 --- a/docs/api_docs/python/vision_results_en.md +++ b/docs/api_docs/python/vision_results_en.md @@ -10,6 +10,7 @@ API: `fastdeploy.vision.ClassifyResult`, The ClassifyResult will return: - **scores**(list of float):Member variables that indicate the confidence level of a single image on the corresponding classification result, the number of which is determined by the `topk` passed in when using the classification model, e.g. the confidence level of a Top 5 classification can be returned. + ## SegmentationResult The code of SegmentationResult is defined in `fastdeploy/vision/common/result.h` and is used to indicate the segmentation category predicted for each pixel in the image and the probability of the segmentation category. @@ -33,6 +34,7 @@ API: `fastdeploy.vision.Mask`, The Mask will return: - **data**:Member variable indicating a detected mask. - **shape**:Member variable representing the shape of the mask, e.g. `(H,W)`. + ## FaceDetectionResult The FaceDetectionResult code is defined in `fastdeploy/vision/common/result.h` and is used to indicate the target frames detected by face detection, face landmarks, target confidence and the number of landmarks per face. @@ -42,6 +44,7 @@ API: `fastdeploy.vision.FaceDetectionResult`, The FaceDetectionResult will retur - **landmarks**(list of list(float)): Member variables that represent the key points of all faces detected by a single image. - **landmarks_per_face**(int):Member variable indicating the number of key points in each face frame. + ## KeyPointDetectionResult The KeyPointDetectionResult code is defined in `fastdeploy/vision/common/result.h` and is used to indicate the coordinates and confidence of each keypoint of the target behavior in the image. @@ -55,12 +58,14 @@ API:`fastdeploy.vision.KeyPointDetectionResult`, The KeyPointDetectionResult wil - `J`: num_joints(number of keypoints for a target) - **num_joints**(int): Member variable, representing the number of keypoints for a target + ## FaceRecognitionResult The FaceRecognitionResult code is defined in `fastdeploy/vision/common/result.h` and is used to indicate the embedding of the image features by the face recognition model. API: `fastdeploy.vision.FaceRecognitionResult`, The FaceRecognitionResult will return: - **landmarks_per_face**(list of float):Member variables, which indicate the final extracted features embedding of the face recognition model, can be used to calculate the feature similarity between faces. + ## MattingResult The MattingResult code is defined in `fastdeploy/vision/common/result.h` and is used to indicate the value of alpha transparency predicted by the model, the predicted outlook, etc. @@ -70,6 +75,7 @@ API:`fastdeploy.vision.MattingResult`, The MattingResult will return: - **contain_foreground**(bool):Indicates whether the predicted outcome includes the foreground. - **shape**(list of int): When `contain_foreground` is false, the shape only contains `(H,W)`, when `contain_foreground` is `true,` the shape contains `(H,W,C)`, C is generally 3. + ## OCRResult The OCRResult code is defined in `fastdeploy/vision/common/result.h` and is used to indicate the text box detected in the image, the text box orientation classification, and the text content recognized inside the text box. @@ -79,3 +85,17 @@ API:`fastdeploy.vision.OCRResult`, The OCRResult will return: - **rec_scores**(list of float):Member variable indicating the confidence level of the text identified in the box, the number of elements is the same as `boxes.size()`. - **cls_scores**(list of float):Member variable indicating the confidence level of the classification result of the text box, with the same number of elements as `boxes.size()`. - **cls_labels**(list of int):Member variable indicating the orientation category of the text box, the number of elements is the same as `boxes.size()`. + + +## FaceAlignmentResult +The code of FaceAlignmentResult is defined in `fastdeploy/vision/common/result.h` and is used to indicate the key points of the face. + +API: `fastdeploy.vision.FaceAlignmentResult`, The FaceAlignmentResult will return: +- **landmarks**(list of list(float)):Member variables that represent the all key points detected from a single face image. + + +## HeadPoseResult +The code of HeadPoseResult is defined in `fastdeploy/vision/common/result.h` and is used to indicate the head pose result. + +API: `fastdeploy.vision.HeadPoseResult`, The HeadPoseResult will return: +- **euler_angles**(list of float):Member variables that represent the Euler angle predicted by a single face image. The storage order is (yaw, pitch, roll), yaw represents the horizontal angle, pitch represents the vertical angle, roll represents the roll angle, and the value range is [-90, +90] Spend. From 36889ff829e5540fe1fa5f21387905a337255c32 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Fri, 4 Nov 2022 07:06:07 +0000 Subject: [PATCH 05/50] fixed for CI --- examples/CMakeLists.txt | 2 +- examples/vision/facealign/pfld/cpp/CMakeLists.txt | 2 +- examples/vision/headpose/fsanet/cpp/CMakeLists.txt | 2 +- examples/vision/headpose/fsanet/cpp/infer.cc | 6 +++--- fastdeploy/vision/matting/contrib/rvm.cc | 3 ++- 5 files changed, 8 insertions(+), 7 deletions(-) mode change 100644 => 100755 examples/vision/headpose/fsanet/cpp/CMakeLists.txt mode change 100644 => 100755 fastdeploy/vision/matting/contrib/rvm.cc diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 7118460ea6e..8aa469b6a5b 100755 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -49,7 +49,7 @@ function(add_fastdeploy_executable FIELD CC_FILE) add_executable(${TEMP_TARGET_NAME} ${TEMP_TARGET_FILE}) target_link_libraries(${TEMP_TARGET_NAME} PUBLIC fastdeploy) if(TARGET gflags) - if(UNIX) + if(UNIX AND (NOT APPLE) AND (NOT ANDROID)) target_link_libraries(${TEMP_TARGET_NAME} PRIVATE gflags pthread) else() target_link_libraries(${TEMP_TARGET_NAME} PRIVATE gflags) diff --git a/examples/vision/facealign/pfld/cpp/CMakeLists.txt b/examples/vision/facealign/pfld/cpp/CMakeLists.txt index be329f69ac0..c417fcb3880 100755 --- a/examples/vision/facealign/pfld/cpp/CMakeLists.txt +++ b/examples/vision/facealign/pfld/cpp/CMakeLists.txt @@ -11,7 +11,7 @@ include_directories(${FASTDEPLOY_INCS}) add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc) # 添加FastDeploy库依赖 -if(UNIX) +if(UNIX AND (NOT APPLE) AND (NOT ANDROID)) target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} gflags pthread) else() target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} gflags) diff --git a/examples/vision/headpose/fsanet/cpp/CMakeLists.txt b/examples/vision/headpose/fsanet/cpp/CMakeLists.txt old mode 100644 new mode 100755 index be329f69ac0..c417fcb3880 --- a/examples/vision/headpose/fsanet/cpp/CMakeLists.txt +++ b/examples/vision/headpose/fsanet/cpp/CMakeLists.txt @@ -11,7 +11,7 @@ include_directories(${FASTDEPLOY_INCS}) add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc) # 添加FastDeploy库依赖 -if(UNIX) +if(UNIX AND (NOT APPLE) AND (NOT ANDROID)) target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} gflags pthread) else() target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} gflags) diff --git a/examples/vision/headpose/fsanet/cpp/infer.cc b/examples/vision/headpose/fsanet/cpp/infer.cc index 332f492606b..522ec3d9540 100644 --- a/examples/vision/headpose/fsanet/cpp/infer.cc +++ b/examples/vision/headpose/fsanet/cpp/infer.cc @@ -44,7 +44,7 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) { } else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") { option->UseTrtBackend(); - option.SetTrtInputShape("images", {1, 3, 64, 64}); + option->SetTrtInputShape("images", {1, 3, 64, 64}); if (FLAGS_backend == "paddle_trt") { option->EnablePaddleToTrt(); } @@ -54,7 +54,7 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) { } else if (FLAGS_backend == "default") { return true; } else { - std::cout << "While inference with GPU, only support default/ort/paddle/trt/paddle_trt now, " << FLAG_backend << " is not supported." << std::endl; + std::cout << "While inference with GPU, only support default/ort/paddle/trt/paddle_trt now, " << FLAGS_backend << " is not supported." << std::endl; return false; } } else if (FLAGS_device == "cpu") { @@ -67,7 +67,7 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) { } else if (FLAGS_backend == "default") { return true; } else { - std::cout << "While inference with CPU, only support default/ort/ov/paddle now, " << FLAG_backend << " is not supported." << std::endl; + std::cout << "While inference with CPU, only support default/ort/ov/paddle now, " << FLAGS_backend << " is not supported." << std::endl; return false; } } else { diff --git a/fastdeploy/vision/matting/contrib/rvm.cc b/fastdeploy/vision/matting/contrib/rvm.cc old mode 100644 new mode 100755 index ec8ed19fc47..6f48a38652a --- a/fastdeploy/vision/matting/contrib/rvm.cc +++ b/fastdeploy/vision/matting/contrib/rvm.cc @@ -138,7 +138,8 @@ bool RobustVideoMatting::Postprocess( result->Clear(); result->contain_foreground = true; - result->shape = {static_cast(in_h), static_cast(in_w)}; + // if contain_foreground == true, shape must set to (h, w, c) + result->shape = {static_cast(in_h), static_cast(in_w), 3}; int numel = in_h * in_w; int nbytes = numel * sizeof(float); result->Resize(numel); From f32a09ad2b82732c9900772ae66e469b82149178 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Mon, 7 Nov 2022 06:19:42 +0000 Subject: [PATCH 06/50] update python api_docs --- docs/api_docs/python/face_alignment.md | 9 +++++++++ docs/api_docs/python/headpose.md | 9 +++++++++ docs/api_docs/python/matting.md | 8 ++++++++ 3 files changed, 26 insertions(+) create mode 100644 docs/api_docs/python/face_alignment.md create mode 100644 docs/api_docs/python/headpose.md diff --git a/docs/api_docs/python/face_alignment.md b/docs/api_docs/python/face_alignment.md new file mode 100644 index 00000000000..f0369b55af6 --- /dev/null +++ b/docs/api_docs/python/face_alignment.md @@ -0,0 +1,9 @@ +# Face Alignment API + +## fastdeploy.vision.facealign.PFLD + +```{eval-rst} +.. autoclass:: fastdeploy.vision.facealign.PFLD + :members: + :inherited-members: +``` diff --git a/docs/api_docs/python/headpose.md b/docs/api_docs/python/headpose.md new file mode 100644 index 00000000000..d1fba74f927 --- /dev/null +++ b/docs/api_docs/python/headpose.md @@ -0,0 +1,9 @@ +# Headpose API + +## fastdeploy.vision.headpose.FSANet + +```{eval-rst} +.. autoclass:: fastdeploy.vision.headpose.FSANet + :members: + :inherited-members: +``` diff --git a/docs/api_docs/python/matting.md b/docs/api_docs/python/matting.md index 7c121110acb..5e9c2a22732 100644 --- a/docs/api_docs/python/matting.md +++ b/docs/api_docs/python/matting.md @@ -15,3 +15,11 @@ :members: :inherited-members: ``` + +## fastdeploy.vision.matting.RobustVideoMatting + +```{eval-rst} +.. autoclass:: fastdeploy.vision.matting.RobustVideoMatting + :members: + :inherited-members: +``` From 7fa4dea9ee03c1058c09f81251b37dc5b8673bee Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Mon, 7 Nov 2022 06:26:57 +0000 Subject: [PATCH 07/50] update index.rst --- docs/api_docs/python/index.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/api_docs/python/index.rst b/docs/api_docs/python/index.rst index 06d4a95cbbe..60eea324e7f 100644 --- a/docs/api_docs/python/index.rst +++ b/docs/api_docs/python/index.rst @@ -20,4 +20,6 @@ FastDeploy matting.md face_recognition.md face_detection.md + face_alignment.md + headpose.md vision_results_en.md From 438906024077cfd30c8ecbf2509ef6bbd7facdd5 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Mon, 7 Nov 2022 09:15:03 +0000 Subject: [PATCH 08/50] add runtime cpp examples --- docs/api_docs/python/index.rst | 2 + docs/api_docs/python/runtime.md | 9 +++ docs/api_docs/python/runtime_option.md | 9 +++ examples/runtime/cpp/CMakeLists.txt | 14 ++++ examples/runtime/cpp/infer_onnx_openvino.cc | 59 +++++++++++++++++ examples/runtime/cpp/infer_onnx_tensorrt.cc | 60 +++++++++++++++++ .../runtime/cpp/infer_paddle_onnxruntime.cc | 60 +++++++++++++++++ examples/runtime/cpp/infer_paddle_openvino.cc | 60 +++++++++++++++++ .../cpp/infer_paddle_paddle_inference.cc | 65 +++++++++++++++++++ examples/runtime/cpp/infer_paddle_tensorrt.cc | 61 +++++++++++++++++ .../runtime/python/infer_paddle_tensorrt.py | 1 + 11 files changed, 400 insertions(+) create mode 100644 docs/api_docs/python/runtime.md create mode 100644 docs/api_docs/python/runtime_option.md create mode 100644 examples/runtime/cpp/CMakeLists.txt create mode 100644 examples/runtime/cpp/infer_onnx_openvino.cc create mode 100644 examples/runtime/cpp/infer_onnx_tensorrt.cc create mode 100644 examples/runtime/cpp/infer_paddle_onnxruntime.cc create mode 100644 examples/runtime/cpp/infer_paddle_openvino.cc create mode 100644 examples/runtime/cpp/infer_paddle_paddle_inference.cc create mode 100644 examples/runtime/cpp/infer_paddle_tensorrt.cc diff --git a/docs/api_docs/python/index.rst b/docs/api_docs/python/index.rst index 60eea324e7f..69b65b3b101 100644 --- a/docs/api_docs/python/index.rst +++ b/docs/api_docs/python/index.rst @@ -23,3 +23,5 @@ FastDeploy face_alignment.md headpose.md vision_results_en.md + runtime.md + runtime_option.md diff --git a/docs/api_docs/python/runtime.md b/docs/api_docs/python/runtime.md new file mode 100644 index 00000000000..4a519ee7ee1 --- /dev/null +++ b/docs/api_docs/python/runtime.md @@ -0,0 +1,9 @@ +# Runtime API + +## fastdeploy.Runtime + +```{eval-rst} +.. autoclass:: fastdeploy.Runtime + :members: + :inherited-members: +``` diff --git a/docs/api_docs/python/runtime_option.md b/docs/api_docs/python/runtime_option.md new file mode 100644 index 00000000000..96eff8672d4 --- /dev/null +++ b/docs/api_docs/python/runtime_option.md @@ -0,0 +1,9 @@ +# Runtime Option API + +## fastdeploy.RuntimeOption + +```{eval-rst} +.. autoclass:: fastdeploy.RuntimeOption + :members: + :inherited-members: +``` diff --git a/examples/runtime/cpp/CMakeLists.txt b/examples/runtime/cpp/CMakeLists.txt new file mode 100644 index 00000000000..09ea45c3b89 --- /dev/null +++ b/examples/runtime/cpp/CMakeLists.txt @@ -0,0 +1,14 @@ +PROJECT(runtime_demo C CXX) +CMAKE_MINIMUM_REQUIRED (VERSION 3.12) + +# 指定下载解压后的fastdeploy库路径 +option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") + +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) + +# 添加FastDeploy依赖头文件 +include_directories(${FASTDEPLOY_INCS}) + +add_executable(runtime_demo ${PROJECT_SOURCE_DIR}/infer_onnx_openvino.cc) +# 添加FastDeploy库依赖 +target_link_libraries(runtime_demo ${FASTDEPLOY_LIBS}) diff --git a/examples/runtime/cpp/infer_onnx_openvino.cc b/examples/runtime/cpp/infer_onnx_openvino.cc new file mode 100644 index 00000000000..4588ec2de6b --- /dev/null +++ b/examples/runtime/cpp/infer_onnx_openvino.cc @@ -0,0 +1,59 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/runtime.h" + +namespace fd = fastdeploy; + +int main(int argc, char* argv[]) { + std::string model_file = "mobilenetv2.onnx"; + + // setup option + fd::RuntimeOption runtime_option; + runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX); + runtime_option.UseOpenVINOBackend(); + runtime_option.SetCpuThreadNum(12); + // init runtime + std::unique_ptr runtime = + std::unique_ptr(new fd::Runtime()); + if (!runtime->Init(runtime_option)) { + std::cerr << "--- Init FastDeploy Runitme Failed! " + << "\n--- Model: " << model_file << std::endl; + return -1; + } else { + std::cout << "--- Init FastDeploy Runitme Done! " + << "\n--- Model: " << model_file << std::endl; + } + // init input tensor shape + fd::TensorInfo info = runtime->GetInputInfo(0); + info.shape = {1, 3, 224, 224}; + + std::vector input_tensors(1); + std::vector output_tensors(1); + + std::vector inputs_data; + inputs_data.reserve(1 * 3 * 224 * 224); + for (size_t i = 0; i < inputs_data.size(); ++i) { + inputs_data[i] = std::rand() % 1000 / 1000.0f; + } + input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data()); + + //get input name + input_tensors[0].name = info.name; + + runtime->Infer(input_tensors, &output_tensors); + + output_tensors[0].PrintInfo(); + return 0; +} \ No newline at end of file diff --git a/examples/runtime/cpp/infer_onnx_tensorrt.cc b/examples/runtime/cpp/infer_onnx_tensorrt.cc new file mode 100644 index 00000000000..26858db6631 --- /dev/null +++ b/examples/runtime/cpp/infer_onnx_tensorrt.cc @@ -0,0 +1,60 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/runtime.h" + +namespace fd = fastdeploy; + +int main(int argc, char* argv[]) { + std::string model_file = "mobilenetv2.onnx"; + + // setup option + fd::RuntimeOption runtime_option; + runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX); + runtime_option.UseGpu(0); + runtime_option.UseTrtBackend(); + runtime_option.SetTrtInputShape("inputs", {1, 3, 224, 224}); + // init runtime + std::unique_ptr runtime = + std::unique_ptr(new fd::Runtime()); + if (!runtime->Init(runtime_option)) { + std::cerr << "--- Init FastDeploy Runitme Failed! " + << "\n--- Model: " << model_file << std::endl; + return -1; + } else { + std::cout << "--- Init FastDeploy Runitme Done! " + << "\n--- Model: " << model_file << std::endl; + } + // init input tensor shape + fd::TensorInfo info = runtime->GetInputInfo(0); + info.shape = {1, 3, 224, 224}; + + std::vector input_tensors(1); + std::vector output_tensors(1); + + std::vector inputs_data; + inputs_data.reserve(1 * 3 * 224 * 224); + for (size_t i = 0; i < inputs_data.size(); ++i) { + inputs_data[i] = std::rand() % 1000 / 1000.0f; + } + input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data()); + + //get input name + input_tensors[0].name = info.name; + + runtime->Infer(input_tensors, &output_tensors); + + output_tensors[0].PrintInfo(); + return 0; +} \ No newline at end of file diff --git a/examples/runtime/cpp/infer_paddle_onnxruntime.cc b/examples/runtime/cpp/infer_paddle_onnxruntime.cc new file mode 100644 index 00000000000..d9ed0819c1c --- /dev/null +++ b/examples/runtime/cpp/infer_paddle_onnxruntime.cc @@ -0,0 +1,60 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/runtime.h" + +namespace fd = fastdeploy; + +int main(int argc, char* argv[]) { + std::string model_file = "mobilenetv2/inference.pdmodel"; + std::string params_file = "mobilenetv2/inference.pdiparams"; + + // setup option + fd::RuntimeOption runtime_option; + runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE); + runtime_option.UseOrtBackend(); + runtime_option.SetCpuThreadNum(12); + // init runtime + std::unique_ptr runtime = + std::unique_ptr(new fd::Runtime()); + if (!runtime->Init(runtime_option)) { + std::cerr << "--- Init FastDeploy Runitme Failed! " + << "\n--- Model: " << model_file << std::endl; + return -1; + } else { + std::cout << "--- Init FastDeploy Runitme Done! " + << "\n--- Model: " << model_file << std::endl; + } + // init input tensor shape + fd::TensorInfo info = runtime->GetInputInfo(0); + info.shape = {1, 3, 224, 224}; + + std::vector input_tensors(1); + std::vector output_tensors(1); + + std::vector inputs_data; + inputs_data.reserve(1 * 3 * 224 * 224); + for (size_t i = 0; i < inputs_data.size(); ++i) { + inputs_data[i] = std::rand() % 1000 / 1000.0f; + } + input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data()); + + //get input name + input_tensors[0].name = info.name; + + runtime->Infer(input_tensors, &output_tensors); + + output_tensors[0].PrintInfo(); + return 0; +} \ No newline at end of file diff --git a/examples/runtime/cpp/infer_paddle_openvino.cc b/examples/runtime/cpp/infer_paddle_openvino.cc new file mode 100644 index 00000000000..3862437d049 --- /dev/null +++ b/examples/runtime/cpp/infer_paddle_openvino.cc @@ -0,0 +1,60 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/runtime.h" + +namespace fd = fastdeploy; + +int main(int argc, char* argv[]) { + std::string model_file = "mobilenetv2/inference.pdmodel"; + std::string params_file = "mobilenetv2/inference.pdiparams"; + + // setup option + fd::RuntimeOption runtime_option; + runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE); + runtime_option.UseOpenVINOBackend(); + runtime_option.SetCpuThreadNum(12); + // init runtime + std::unique_ptr runtime = + std::unique_ptr(new fd::Runtime()); + if (!runtime->Init(runtime_option)) { + std::cerr << "--- Init FastDeploy Runitme Failed! " + << "\n--- Model: " << model_file << std::endl; + return -1; + } else { + std::cout << "--- Init FastDeploy Runitme Done! " + << "\n--- Model: " << model_file << std::endl; + } + // init input tensor shape + fd::TensorInfo info = runtime->GetInputInfo(0); + info.shape = {1, 3, 224, 224}; + + std::vector input_tensors(1); + std::vector output_tensors(1); + + std::vector inputs_data; + inputs_data.reserve(1 * 3 * 224 * 224); + for (size_t i = 0; i < inputs_data.size(); ++i) { + inputs_data[i] = std::rand() % 1000 / 1000.0f; + } + input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data()); + + //get input name + input_tensors[0].name = info.name; + + runtime->Infer(input_tensors, &output_tensors); + + output_tensors[0].PrintInfo(); + return 0; +} \ No newline at end of file diff --git a/examples/runtime/cpp/infer_paddle_paddle_inference.cc b/examples/runtime/cpp/infer_paddle_paddle_inference.cc new file mode 100644 index 00000000000..ac34f9bf8e3 --- /dev/null +++ b/examples/runtime/cpp/infer_paddle_paddle_inference.cc @@ -0,0 +1,65 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/runtime.h" + +namespace fd = fastdeploy; + +int main(int argc, char* argv[]) { + std::string model_file = "mobilenetv2/inference.pdmodel"; + std::string params_file = "mobilenetv2/inference.pdiparams"; + + // setup option + fd::RuntimeOption runtime_option; + runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE); + // CPU + runtime_option.UsePaddleBackend(); + runtime_option.SetCpuThreadNum(12); + // GPU + // runtime_option.UseGpu(0); + // IPU + // runtime_option.UseIpu(); + // init runtime + std::unique_ptr runtime = + std::unique_ptr(new fd::Runtime()); + if (!runtime->Init(runtime_option)) { + std::cerr << "--- Init FastDeploy Runitme Failed! " + << "\n--- Model: " << model_file << std::endl; + return -1; + } else { + std::cout << "--- Init FastDeploy Runitme Done! " + << "\n--- Model: " << model_file << std::endl; + } + // init input tensor shape + fd::TensorInfo info = runtime->GetInputInfo(0); + info.shape = {1, 3, 224, 224}; + + std::vector input_tensors(1); + std::vector output_tensors(1); + + std::vector inputs_data; + inputs_data.reserve(1 * 3 * 224 * 224); + for (size_t i = 0; i < inputs_data.size(); ++i) { + inputs_data[i] = std::rand() % 1000 / 1000.0f; + } + input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data()); + + //get input name + input_tensors[0].name = info.name; + + runtime->Infer(input_tensors, &output_tensors); + + output_tensors[0].PrintInfo(); + return 0; +} \ No newline at end of file diff --git a/examples/runtime/cpp/infer_paddle_tensorrt.cc b/examples/runtime/cpp/infer_paddle_tensorrt.cc new file mode 100644 index 00000000000..7189ee87d0d --- /dev/null +++ b/examples/runtime/cpp/infer_paddle_tensorrt.cc @@ -0,0 +1,61 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/runtime.h" + +namespace fd = fastdeploy; + +int main(int argc, char* argv[]) { + std::string model_file = "mobilenetv2/inference.pdmodel"; + std::string params_file = "mobilenetv2/inference.pdiparams"; + + // setup option + fd::RuntimeOption runtime_option; + runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE); + runtime_option.UseGpu(0); + runtime_option.UseTrtBackend(); + runtime_option.EnablePaddleToTrt(); + // init runtime + std::unique_ptr runtime = + std::unique_ptr(new fd::Runtime()); + if (!runtime->Init(runtime_option)) { + std::cerr << "--- Init FastDeploy Runitme Failed! " + << "\n--- Model: " << model_file << std::endl; + return -1; + } else { + std::cout << "--- Init FastDeploy Runitme Done! " + << "\n--- Model: " << model_file << std::endl; + } + // init input tensor shape + fd::TensorInfo info = runtime->GetInputInfo(0); + info.shape = {1, 3, 224, 224}; + + std::vector input_tensors(1); + std::vector output_tensors(1); + + std::vector inputs_data; + inputs_data.reserve(1 * 3 * 224 * 224); + for (size_t i = 0; i < inputs_data.size(); ++i) { + inputs_data[i] = std::rand() % 1000 / 1000.0f; + } + input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data()); + + //get input name + input_tensors[0].name = info.name; + + runtime->Infer(input_tensors, &output_tensors); + + output_tensors[0].PrintInfo(); + return 0; +} \ No newline at end of file diff --git a/examples/runtime/python/infer_paddle_tensorrt.py b/examples/runtime/python/infer_paddle_tensorrt.py index ad2b8e1976a..8388656c79f 100644 --- a/examples/runtime/python/infer_paddle_tensorrt.py +++ b/examples/runtime/python/infer_paddle_tensorrt.py @@ -27,6 +27,7 @@ # **** GPU 配置 *** option.use_gpu(0) option.use_trt_backend() +option.enable_paddle_to_trt() # 初始化构造runtime runtime = fd.Runtime(option) From 84d564fa7c2463c6641cd2e09288605be0f652b7 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Mon, 7 Nov 2022 11:03:17 +0000 Subject: [PATCH 09/50] deal with comments --- examples/runtime/cpp/infer_onnx_openvino.cc | 2 +- examples/runtime/cpp/infer_onnx_tensorrt.cc | 2 +- examples/runtime/cpp/infer_paddle_onnxruntime.cc | 2 +- examples/runtime/cpp/infer_paddle_openvino.cc | 2 +- examples/runtime/cpp/infer_paddle_paddle_inference.cc | 2 +- examples/runtime/cpp/infer_paddle_tensorrt.cc | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/runtime/cpp/infer_onnx_openvino.cc b/examples/runtime/cpp/infer_onnx_openvino.cc index 4588ec2de6b..c2f270be9f3 100644 --- a/examples/runtime/cpp/infer_onnx_openvino.cc +++ b/examples/runtime/cpp/infer_onnx_openvino.cc @@ -43,7 +43,7 @@ int main(int argc, char* argv[]) { std::vector output_tensors(1); std::vector inputs_data; - inputs_data.reserve(1 * 3 * 224 * 224); + inputs_data.resize(1 * 3 * 224 * 224); for (size_t i = 0; i < inputs_data.size(); ++i) { inputs_data[i] = std::rand() % 1000 / 1000.0f; } diff --git a/examples/runtime/cpp/infer_onnx_tensorrt.cc b/examples/runtime/cpp/infer_onnx_tensorrt.cc index 26858db6631..084c1dfae6d 100644 --- a/examples/runtime/cpp/infer_onnx_tensorrt.cc +++ b/examples/runtime/cpp/infer_onnx_tensorrt.cc @@ -44,7 +44,7 @@ int main(int argc, char* argv[]) { std::vector output_tensors(1); std::vector inputs_data; - inputs_data.reserve(1 * 3 * 224 * 224); + inputs_data.resize(1 * 3 * 224 * 224); for (size_t i = 0; i < inputs_data.size(); ++i) { inputs_data[i] = std::rand() % 1000 / 1000.0f; } diff --git a/examples/runtime/cpp/infer_paddle_onnxruntime.cc b/examples/runtime/cpp/infer_paddle_onnxruntime.cc index d9ed0819c1c..d8d036a0346 100644 --- a/examples/runtime/cpp/infer_paddle_onnxruntime.cc +++ b/examples/runtime/cpp/infer_paddle_onnxruntime.cc @@ -44,7 +44,7 @@ int main(int argc, char* argv[]) { std::vector output_tensors(1); std::vector inputs_data; - inputs_data.reserve(1 * 3 * 224 * 224); + inputs_data.resize(1 * 3 * 224 * 224); for (size_t i = 0; i < inputs_data.size(); ++i) { inputs_data[i] = std::rand() % 1000 / 1000.0f; } diff --git a/examples/runtime/cpp/infer_paddle_openvino.cc b/examples/runtime/cpp/infer_paddle_openvino.cc index 3862437d049..3958cdcf0e6 100644 --- a/examples/runtime/cpp/infer_paddle_openvino.cc +++ b/examples/runtime/cpp/infer_paddle_openvino.cc @@ -44,7 +44,7 @@ int main(int argc, char* argv[]) { std::vector output_tensors(1); std::vector inputs_data; - inputs_data.reserve(1 * 3 * 224 * 224); + inputs_data.resize(1 * 3 * 224 * 224); for (size_t i = 0; i < inputs_data.size(); ++i) { inputs_data[i] = std::rand() % 1000 / 1000.0f; } diff --git a/examples/runtime/cpp/infer_paddle_paddle_inference.cc b/examples/runtime/cpp/infer_paddle_paddle_inference.cc index ac34f9bf8e3..1d0bd82ad2f 100644 --- a/examples/runtime/cpp/infer_paddle_paddle_inference.cc +++ b/examples/runtime/cpp/infer_paddle_paddle_inference.cc @@ -49,7 +49,7 @@ int main(int argc, char* argv[]) { std::vector output_tensors(1); std::vector inputs_data; - inputs_data.reserve(1 * 3 * 224 * 224); + inputs_data.resize(1 * 3 * 224 * 224); for (size_t i = 0; i < inputs_data.size(); ++i) { inputs_data[i] = std::rand() % 1000 / 1000.0f; } diff --git a/examples/runtime/cpp/infer_paddle_tensorrt.cc b/examples/runtime/cpp/infer_paddle_tensorrt.cc index 7189ee87d0d..04fe311b2c7 100644 --- a/examples/runtime/cpp/infer_paddle_tensorrt.cc +++ b/examples/runtime/cpp/infer_paddle_tensorrt.cc @@ -45,7 +45,7 @@ int main(int argc, char* argv[]) { std::vector output_tensors(1); std::vector inputs_data; - inputs_data.reserve(1 * 3 * 224 * 224); + inputs_data.resize(1 * 3 * 224 * 224); for (size_t i = 0; i < inputs_data.size(); ++i) { inputs_data[i] = std::rand() % 1000 / 1000.0f; } From e6f4e63acbcc51c53b4f29858d09dc045f2f56d2 Mon Sep 17 00:00:00 2001 From: Jason <928090362@qq.com> Date: Mon, 7 Nov 2022 19:48:58 +0800 Subject: [PATCH 10/50] Update infer_paddle_tensorrt.py --- examples/runtime/python/infer_paddle_tensorrt.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/runtime/python/infer_paddle_tensorrt.py b/examples/runtime/python/infer_paddle_tensorrt.py index 8388656c79f..94c95cb872d 100644 --- a/examples/runtime/python/infer_paddle_tensorrt.py +++ b/examples/runtime/python/infer_paddle_tensorrt.py @@ -27,7 +27,8 @@ # **** GPU 配置 *** option.use_gpu(0) option.use_trt_backend() -option.enable_paddle_to_trt() +# using TensorRT integrated in Paddle Inference +# option.enable_paddle_to_trt() # 初始化构造runtime runtime = fd.Runtime(option) From 1952c99bfed66e0aa29e02c4f9f27f9024021985 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Tue, 8 Nov 2022 07:21:16 +0000 Subject: [PATCH 11/50] Add runtime quick start --- docs/cn/quick_start/runtime/cpp.md | 116 ++++++++++++++++++++++++++ docs/cn/quick_start/runtime/python.md | 51 +++++++++++ 2 files changed, 167 insertions(+) diff --git a/docs/cn/quick_start/runtime/cpp.md b/docs/cn/quick_start/runtime/cpp.md index 7d52d9b58be..a4ed9485092 100644 --- a/docs/cn/quick_start/runtime/cpp.md +++ b/docs/cn/quick_start/runtime/cpp.md @@ -1 +1,117 @@ # C++推理 + +确认开发环境已准备FastDeploy C++部署库,参考[FastDeploy安装](../../build_and_install/)安装预编译的FastDeploy,或根据自己需求进行编译安装。 + +本文档以 PaddleClas 分类模型 MobileNetV2 为例展示CPU上的推理示例 + +## 1. 获取模型 + +```bash +wget https://bj.bcebos.com/fastdeploy/models/mobilenetv2.tgz +tar xvf mobilenetv2.tgz +``` + +## 2. 配置后端 + +如下C++代码保存为`infer_paddle_onnxruntime.cc` + +``` c++ +#include "fastdeploy/runtime.h" + +namespace fd = fastdeploy; + +int main(int argc, char* argv[]) { + std::string model_file = "mobilenetv2/inference.pdmodel"; + std::string params_file = "mobilenetv2/inference.pdiparams"; + + // setup option + fd::RuntimeOption runtime_option; + runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE); + runtime_option.UseOrtBackend(); + runtime_option.SetCpuThreadNum(12); + // init runtime + std::unique_ptr runtime = + std::unique_ptr(new fd::Runtime()); + if (!runtime->Init(runtime_option)) { + std::cerr << "--- Init FastDeploy Runitme Failed! " + << "\n--- Model: " << model_file << std::endl; + return -1; + } else { + std::cout << "--- Init FastDeploy Runitme Done! " + << "\n--- Model: " << model_file << std::endl; + } + // init input tensor shape + fd::TensorInfo info = runtime->GetInputInfo(0); + info.shape = {1, 3, 224, 224}; + + std::vector input_tensors(1); + std::vector output_tensors(1); + + std::vector inputs_data; + inputs_data.resize(1 * 3 * 224 * 224); + for (size_t i = 0; i < inputs_data.size(); ++i) { + inputs_data[i] = std::rand() % 1000 / 1000.0f; + } + input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data()); + + //get input name + input_tensors[0].name = info.name; + + runtime->Infer(input_tensors, &output_tensors); + + output_tensors[0].PrintInfo(); + return 0; +} +``` +加载完成,会输出提示如下,说明初始化的后端,以及运行的硬件设备 +``` +[INFO] fastdeploy/fastdeploy_runtime.cc(283)::Init Runtime initialized with Backend::OrtBackend in device Device::CPU. +``` + +## 3. 准备CMakeLists.txt + +FastDeploy中包含多个依赖库,直接采用`g++`或编译器编译较为繁杂,推荐使用cmake进行编译配置。示例配置如下, + +```cmake +PROJECT(runtime_demo C CXX) +CMAKE_MINIMUM_REQUIRED (VERSION 3.12) + +# 指定下载解压后的fastdeploy库路径 +option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") + +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) + +# 添加FastDeploy依赖头文件 +include_directories(${FASTDEPLOY_INCS}) + +add_executable(runtime_demo ${PROJECT_SOURCE_DIR}/infer_onnx_openvino.cc) +# 添加FastDeploy库依赖 +target_link_libraries(runtime_demo ${FASTDEPLOY_LIBS}) +``` + +## 4. 编译可执行程序 + +打开命令行终端,进入`infer_paddle_onnxruntime.cc`和`CMakeLists.txt`所在的目录,执行如下命令 + +```bash +cd examples/runtime/cpp +mkdir build & cd build +cmake .. -DFASTDEPLOY_INSTALL_DIR=$fastdeploy_cpp_sdk +make -j +``` + +```fastdeploy_cpp_sdk``` 为FastDeploy C++部署库路径 + +编译完成后,使用如下命令执行可得到预测结果 +```bash +./runtime_demo +``` +执行时如提示`error while loading shared libraries: libxxx.so: cannot open shared object file: No such file...`,说明程序执行时没有找到FastDeploy的库路径,可通过执行如下命令,将FastDeploy的库路径添加到环境变量之后,重新执行二进制程序。 +```bash +source /Path/to/fastdeploy_cpp_sdk/fastdeploy_init.sh +``` + +## 其它文档 + +- [不同后端Runtime demo示例](../../../../examples/runtime/README.md) +- [切换模型推理的硬件和后端](../../faq/how_to_change_backend.md) diff --git a/docs/cn/quick_start/runtime/python.md b/docs/cn/quick_start/runtime/python.md index cb2c6efd227..23e78956fb2 100644 --- a/docs/cn/quick_start/runtime/python.md +++ b/docs/cn/quick_start/runtime/python.md @@ -1 +1,52 @@ # Python推理 + +确认开发环境已安装FastDeploy,参考[FastDeploy安装](../../build_and_install/)安装预编译的FastDeploy,或根据自己需求进行编译安装。 + +本文档以 PaddleClas 分类模型 MobileNetV2 为例展示CPU上的推理示例 + +## 1. 获取模型 + +``` python +import fastdeploy as fd + +model_url = "https://bj.bcebos.com/fastdeploy/models/mobilenetv2.tgz" +fd.download_and_decompress(model_url, path=".") +``` + +## 2. 配置后端 + +- 更多后端的示例可参考[examples/runtime](https://github.com/PaddlePaddle/FastDeploy/tree/develop/examples/runtime) + +``` python +option = fd.RuntimeOption() + +option.set_model_path("mobilenetv2/inference.pdmodel", + "mobilenetv2/inference.pdiparams") + +# **** CPU 配置 **** +option.use_cpu() +option.use_ort_backend() +option.set_cpu_thread_num(12) + +# 初始化构造runtime +runtime = fd.Runtime(option) + +# 获取模型输入名 +input_name = runtime.get_input_info(0).name + +# 构造随机数据进行推理 +results = runtime.infer({ + input_name: np.random.rand(1, 3, 224, 224).astype("float32") +}) + +print(results[0].shape) +``` +加载完成,会输出提示如下,说明初始化的后端,以及运行的硬件设备 +``` +[INFO] fastdeploy/fastdeploy_runtime.cc(283)::Init Runtime initialized with Backend::OrtBackend in device Device::CPU. +``` + +## 其它文档 + +- [不同后端Runtime demo示例](../../../../examples/runtime/README.md) +- [切换模型推理的硬件和后端](../../faq/how_to_change_backend.md) From d495b8e8081824e5606e1d255b80192abb2b4165 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Tue, 8 Nov 2022 11:09:11 +0000 Subject: [PATCH 12/50] deal with comments --- docs/cn/quick_start/runtime/cpp.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/cn/quick_start/runtime/cpp.md b/docs/cn/quick_start/runtime/cpp.md index a4ed9485092..5fe86c7b6a5 100644 --- a/docs/cn/quick_start/runtime/cpp.md +++ b/docs/cn/quick_start/runtime/cpp.md @@ -111,6 +111,8 @@ make -j source /Path/to/fastdeploy_cpp_sdk/fastdeploy_init.sh ``` +本示例代码在各平台(Windows/Linux/Mac)上通用,但编译过程仅支持(Linux/Mac),Windows上使用msbuild进行编译,具体使用方式参考[Windows平台使用FastDeploy C++ SDK](../../faq/use_sdk_on_windows.md) + ## 其它文档 - [不同后端Runtime demo示例](../../../../examples/runtime/README.md) From 731b8220650eb47d40d5b87d5c26d7bc57a6f83e Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Tue, 8 Nov 2022 12:47:09 +0000 Subject: [PATCH 13/50] fixed reused_input_tensors&&reused_output_tensors --- examples/runtime/README.md | 14 ++ examples/runtime/cpp/README.md | 121 ++++++++++++++++++ examples/runtime/python/README.md | 53 ++++++++ .../vision/classification/yolov5cls/README.md | 2 - fastdeploy/fastdeploy_model.cc | 2 +- fastdeploy/fastdeploy_model.h | 19 ++- .../vision/classification/ppcls/model.cc | 8 +- .../vision/detection/contrib/scaledyolov4.cc | 8 +- fastdeploy/vision/detection/contrib/yolor.cc | 8 +- fastdeploy/vision/detection/contrib/yolov5.cc | 10 +- .../vision/detection/contrib/yolov5lite.cc | 12 +- fastdeploy/vision/detection/contrib/yolov6.cc | 10 +- fastdeploy/vision/detection/contrib/yolov7.cc | 10 +- .../detection/contrib/yolov7end2end_ort.cc | 8 +- .../detection/contrib/yolov7end2end_trt.cc | 10 +- fastdeploy/vision/detection/contrib/yolox.cc | 10 +- fastdeploy/vision/detection/ppdet/ppyoloe.cc | 6 +- 17 files changed, 247 insertions(+), 64 deletions(-) mode change 100644 => 100755 examples/runtime/README.md create mode 100644 examples/runtime/cpp/README.md create mode 100644 examples/runtime/python/README.md mode change 100644 => 100755 fastdeploy/fastdeploy_model.cc mode change 100644 => 100755 fastdeploy/fastdeploy_model.h mode change 100644 => 100755 fastdeploy/vision/classification/ppcls/model.cc mode change 100644 => 100755 fastdeploy/vision/detection/contrib/scaledyolov4.cc mode change 100644 => 100755 fastdeploy/vision/detection/contrib/yolor.cc mode change 100644 => 100755 fastdeploy/vision/detection/contrib/yolov5.cc mode change 100644 => 100755 fastdeploy/vision/detection/contrib/yolov5lite.cc mode change 100644 => 100755 fastdeploy/vision/detection/contrib/yolov6.cc mode change 100644 => 100755 fastdeploy/vision/detection/contrib/yolov7.cc mode change 100644 => 100755 fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc mode change 100644 => 100755 fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc mode change 100644 => 100755 fastdeploy/vision/detection/contrib/yolox.cc mode change 100644 => 100755 fastdeploy/vision/detection/ppdet/ppyoloe.cc diff --git a/examples/runtime/README.md b/examples/runtime/README.md old mode 100644 new mode 100755 index 18651bd6995..b434bc99eb0 --- a/examples/runtime/README.md +++ b/examples/runtime/README.md @@ -1,5 +1,9 @@ # FastDeploy Runtime examples +FastDeploy Runtime C++ 推理示例如下 + +## Python 示例 + | Example Code | Program Language | Description | | :------- | :------- | :---- | | python/infer_paddle_paddle_inference.py | Python | Deploy Paddle model with Paddle Inference(CPU/GPU) | @@ -8,9 +12,19 @@ | python/infer_paddle_onnxruntime.py | Python | Deploy Paddle model with ONNX Runtime(CPU/GPU) | | python/infer_onnx_openvino.py | Python | Deploy ONNX model with OpenVINO(CPU) | | python/infer_onnx_tensorrt.py | Python | Deploy ONNX model with TensorRT(GPU) | + +## C++ 示例 + +| Example Code | Program Language | Description | +| :------- | :------- | :---- | | cpp/infer_paddle_paddle_inference.cc | C++ | Deploy Paddle model with Paddle Inference(CPU/GPU) | | cpp/infer_paddle_tensorrt.cc | C++ | Deploy Paddle model with TensorRT(GPU) | | cpp/infer_paddle_openvino.cc | C++ | Deploy Paddle model with OpenVINO(CPU | | cpp/infer_paddle_onnxruntime.cc | C++ | Deploy Paddle model with ONNX Runtime(CPU/GPU) | | cpp/infer_onnx_openvino.cc | C++ | Deploy ONNX model with OpenVINO(CPU) | | cpp/infer_onnx_tensorrt.cc | C++ | Deploy ONNX model with TensorRT(GPU) | + +## 详细部署文档 + +- [Python部署](python) +- [C++部署](cpp) diff --git a/examples/runtime/cpp/README.md b/examples/runtime/cpp/README.md new file mode 100644 index 00000000000..9de8b1d6271 --- /dev/null +++ b/examples/runtime/cpp/README.md @@ -0,0 +1,121 @@ +# C++推理 + +在运行demo前,需确认以下两个步骤 + +- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) +- 2. 根据开发环境,下载预编译部署库和samples代码,参考[FastDeploy预编译库](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) + +本文档以 PaddleClas 分类模型 MobileNetV2 为例展示CPU上的推理示例 + +## 1. 获取模型 + +```bash +wget https://bj.bcebos.com/fastdeploy/models/mobilenetv2.tgz +tar xvf mobilenetv2.tgz +``` + +## 2. 配置后端 + +如下C++代码保存为`infer_paddle_onnxruntime.cc` + +``` c++ +#include "fastdeploy/runtime.h" + +namespace fd = fastdeploy; + +int main(int argc, char* argv[]) { + std::string model_file = "mobilenetv2/inference.pdmodel"; + std::string params_file = "mobilenetv2/inference.pdiparams"; + + // setup option + fd::RuntimeOption runtime_option; + runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE); + runtime_option.UseOrtBackend(); + runtime_option.SetCpuThreadNum(12); + // init runtime + std::unique_ptr runtime = + std::unique_ptr(new fd::Runtime()); + if (!runtime->Init(runtime_option)) { + std::cerr << "--- Init FastDeploy Runitme Failed! " + << "\n--- Model: " << model_file << std::endl; + return -1; + } else { + std::cout << "--- Init FastDeploy Runitme Done! " + << "\n--- Model: " << model_file << std::endl; + } + // init input tensor shape + fd::TensorInfo info = runtime->GetInputInfo(0); + info.shape = {1, 3, 224, 224}; + + std::vector input_tensors(1); + std::vector output_tensors(1); + + std::vector inputs_data; + inputs_data.resize(1 * 3 * 224 * 224); + for (size_t i = 0; i < inputs_data.size(); ++i) { + inputs_data[i] = std::rand() % 1000 / 1000.0f; + } + input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data()); + + //get input name + input_tensors[0].name = info.name; + + runtime->Infer(input_tensors, &output_tensors); + + output_tensors[0].PrintInfo(); + return 0; +} +``` +加载完成,会输出提示如下,说明初始化的后端,以及运行的硬件设备 +``` +[INFO] fastdeploy/fastdeploy_runtime.cc(283)::Init Runtime initialized with Backend::OrtBackend in device Device::CPU. +``` + +## 3. 准备CMakeLists.txt + +FastDeploy中包含多个依赖库,直接采用`g++`或编译器编译较为繁杂,推荐使用cmake进行编译配置。示例配置如下, + +```cmake +PROJECT(runtime_demo C CXX) +CMAKE_MINIMUM_REQUIRED (VERSION 3.12) + +# 指定下载解压后的fastdeploy库路径 +option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") + +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) + +# 添加FastDeploy依赖头文件 +include_directories(${FASTDEPLOY_INCS}) + +add_executable(runtime_demo ${PROJECT_SOURCE_DIR}/infer_onnx_openvino.cc) +# 添加FastDeploy库依赖 +target_link_libraries(runtime_demo ${FASTDEPLOY_LIBS}) +``` + +## 4. 编译可执行程序 + +打开命令行终端,进入`infer_paddle_onnxruntime.cc`和`CMakeLists.txt`所在的目录,执行如下命令 + +```bash +mkdir build & cd build +cmake .. -DFASTDEPLOY_INSTALL_DIR=$fastdeploy_cpp_sdk +make -j +``` + +```fastdeploy_cpp_sdk``` 为FastDeploy C++部署库路径 + +编译完成后,使用如下命令执行可得到预测结果 +```bash +./runtime_demo +``` +执行时如提示`error while loading shared libraries: libxxx.so: cannot open shared object file: No such file...`,说明程序执行时没有找到FastDeploy的库路径,可通过执行如下命令,将FastDeploy的库路径添加到环境变量之后,重新执行二进制程序。 +```bash +source /Path/to/fastdeploy_cpp_sdk/fastdeploy_init.sh +``` + +本示例代码在各平台(Windows/Linux/Mac)上通用,但编译过程仅支持(Linux/Mac),Windows上使用msbuild进行编译,具体使用方式参考[Windows平台使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md) + +## 其它文档 + +- [Runtime Python 示例](../python) +- [切换模型推理的硬件和后端](../../../../../docs/cn/faq/how_to_change_backend.md) diff --git a/examples/runtime/python/README.md b/examples/runtime/python/README.md new file mode 100644 index 00000000000..c9692fca6b1 --- /dev/null +++ b/examples/runtime/python/README.md @@ -0,0 +1,53 @@ +# Python推理 + +在运行demo前,需确认以下两个步骤 + +- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) +- 2. FastDeploy Python whl包安装,参考[FastDeploy Python安装](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) + +本文档以 PaddleClas 分类模型 MobileNetV2 为例展示 CPU 上的推理示例 + +## 1. 获取模型 + +``` python +import fastdeploy as fd + +model_url = "https://bj.bcebos.com/fastdeploy/models/mobilenetv2.tgz" +fd.download_and_decompress(model_url, path=".") +``` + +## 2. 配置后端 + +``` python +option = fd.RuntimeOption() + +option.set_model_path("mobilenetv2/inference.pdmodel", + "mobilenetv2/inference.pdiparams") + +# **** CPU 配置 **** +option.use_cpu() +option.use_ort_backend() +option.set_cpu_thread_num(12) + +# 初始化构造runtime +runtime = fd.Runtime(option) + +# 获取模型输入名 +input_name = runtime.get_input_info(0).name + +# 构造随机数据进行推理 +results = runtime.infer({ + input_name: np.random.rand(1, 3, 224, 224).astype("float32") +}) + +print(results[0].shape) +``` +加载完成,会输出提示如下,说明初始化的后端,以及运行的硬件设备 +``` +[INFO] fastdeploy/fastdeploy_runtime.cc(283)::Init Runtime initialized with Backend::OrtBackend in device Device::CPU. +``` + +## 其它文档 + +- [Runtime C++ 示例](../cpp) +- [切换模型推理的硬件和后端](../../../../../docs/cn/faq/how_to_change_backend.md) diff --git a/examples/vision/classification/yolov5cls/README.md b/examples/vision/classification/yolov5cls/README.md index 9ed02b7286f..468c9d963b1 100644 --- a/examples/vision/classification/yolov5cls/README.md +++ b/examples/vision/classification/yolov5cls/README.md @@ -17,8 +17,6 @@ | [YOLOv5x-cls](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5x-cls.onnx) | 184MB | 79.0% | 94.4% | - - ## 详细部署文档 - [Python部署](python) diff --git a/fastdeploy/fastdeploy_model.cc b/fastdeploy/fastdeploy_model.cc old mode 100644 new mode 100755 index ce18b7eb9be..828bc4878da --- a/fastdeploy/fastdeploy_model.cc +++ b/fastdeploy/fastdeploy_model.cc @@ -239,7 +239,7 @@ bool FastDeployModel::Infer(std::vector& input_tensors, } bool FastDeployModel::Infer() { - return Infer(reused_input_tensors, &reused_output_tensors); + return Infer(reused_input_tensors_, &reused_output_tensors_); } std::map FastDeployModel::PrintStatisInfoOfRuntime() { diff --git a/fastdeploy/fastdeploy_model.h b/fastdeploy/fastdeploy_model.h old mode 100644 new mode 100755 index 5a6cd3736db..75e67578e5a --- a/fastdeploy/fastdeploy_model.h +++ b/fastdeploy/fastdeploy_model.h @@ -28,7 +28,7 @@ class FASTDEPLOY_DECL FastDeployModel { virtual bool Infer(std::vector& input_tensors, std::vector* output_tensors); - /** \brief Inference the model by the runtime. This interface is using class member reused_input_tensors to do inference and writing results to reused_output_tensors + /** \brief Inference the model by the runtime. This interface is using class member reused_input_tensors_ to do inference and writing results to reused_output_tensors_ */ virtual bool Infer(); @@ -107,17 +107,10 @@ class FASTDEPLOY_DECL FastDeployModel { /** \brief Release reused input/output buffers */ virtual void ReleaseReusedBuffer() { - std::vector().swap(reused_input_tensors); - std::vector().swap(reused_output_tensors); + std::vector().swap(reused_input_tensors_); + std::vector().swap(reused_output_tensors_); } - /** \brief Reused input tensors - */ - std::vector reused_input_tensors; - /** \brief Reused output tensors - */ - std::vector reused_output_tensors; - protected: virtual bool InitRuntime(); virtual bool CreateCpuBackend(); @@ -126,7 +119,11 @@ class FASTDEPLOY_DECL FastDeployModel { virtual bool CreateRKNPUBackend(); bool initialized = false; - std::vector valid_external_backends; + std::vector valid_external_backends_; + // Reused input tensors + std::vector reused_input_tensors_; + // Reused output tensors + std::vector reused_output_tensors_; private: std::shared_ptr runtime_; diff --git a/fastdeploy/vision/classification/ppcls/model.cc b/fastdeploy/vision/classification/ppcls/model.cc old mode 100644 new mode 100755 index 5f88e0a724f..a9a8182e3ea --- a/fastdeploy/vision/classification/ppcls/model.cc +++ b/fastdeploy/vision/classification/ppcls/model.cc @@ -60,18 +60,18 @@ bool PaddleClasModel::Predict(const cv::Mat& im, ClassifyResult* result) { bool PaddleClasModel::BatchPredict(const std::vector& images, std::vector* results) { std::vector fd_images = WrapMat(images); - if (!preprocessor_.Run(&fd_images, &reused_input_tensors)) { + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) { FDERROR << "Failed to preprocess the input image." << std::endl; return false; } - reused_input_tensors[0].name = InputInfoOfRuntime(0).name; - if (!Infer(reused_input_tensors, &reused_output_tensors)) { + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { FDERROR << "Failed to inference by runtime." << std::endl; return false; } - if (!postprocessor_.Run(reused_output_tensors, results)) { + if (!postprocessor_.Run(reused_output_tensors_, results)) { FDERROR << "Failed to postprocess the inference results by runtime." << std::endl; return false; } diff --git a/fastdeploy/vision/detection/contrib/scaledyolov4.cc b/fastdeploy/vision/detection/contrib/scaledyolov4.cc old mode 100644 new mode 100755 index 46413438e82..694d7dd58f6 --- a/fastdeploy/vision/detection/contrib/scaledyolov4.cc +++ b/fastdeploy/vision/detection/contrib/scaledyolov4.cc @@ -84,7 +84,7 @@ bool ScaledYOLOv4::Initialize() { is_scale_up = false; stride = 32; max_wh = 7680.0; - reused_input_tensors.resize(1); + reused_input_tensors_.resize(1); if (!InitRuntime()) { FDERROR << "Failed to initialize fastdeploy backend." << std::endl; @@ -230,17 +230,17 @@ bool ScaledYOLOv4::Predict(cv::Mat* im, DetectionResult* result, im_info["output_shape"] = {static_cast(mat.Height()), static_cast(mat.Width())}; - if (!Preprocess(&mat, &reused_input_tensors[0], &im_info)) { + if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) { FDERROR << "Failed to preprocess input image." << std::endl; return false; } - reused_input_tensors[0].name = InputInfoOfRuntime(0).name; + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; if (!Infer()) { FDERROR << "Failed to inference." << std::endl; return false; } - if (!Postprocess(reused_output_tensors[0], result, im_info, conf_threshold, + if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold, nms_iou_threshold)) { FDERROR << "Failed to post process." << std::endl; return false; diff --git a/fastdeploy/vision/detection/contrib/yolor.cc b/fastdeploy/vision/detection/contrib/yolor.cc old mode 100644 new mode 100755 index 5852e72067b..31c56f57696 --- a/fastdeploy/vision/detection/contrib/yolor.cc +++ b/fastdeploy/vision/detection/contrib/yolor.cc @@ -83,7 +83,7 @@ bool YOLOR::Initialize() { is_scale_up = false; stride = 32; max_wh = 7680.0; - reused_input_tensors.resize(1); + reused_input_tensors_.resize(1); if (!InitRuntime()) { FDERROR << "Failed to initialize fastdeploy backend." << std::endl; @@ -227,18 +227,18 @@ bool YOLOR::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, im_info["output_shape"] = {static_cast(mat.Height()), static_cast(mat.Width())}; - if (!Preprocess(&mat, &reused_input_tensors[0], &im_info)) { + if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) { FDERROR << "Failed to preprocess input image." << std::endl; return false; } - reused_input_tensors[0].name = InputInfoOfRuntime(0).name; + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; if (!Infer()) { FDERROR << "Failed to inference." << std::endl; return false; } - if (!Postprocess(reused_output_tensors[0], result, im_info, conf_threshold, + if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold, nms_iou_threshold)) { FDERROR << "Failed to post process." << std::endl; return false; diff --git a/fastdeploy/vision/detection/contrib/yolov5.cc b/fastdeploy/vision/detection/contrib/yolov5.cc old mode 100644 new mode 100755 index 27f74fd5516..ba5b22363ec --- a/fastdeploy/vision/detection/contrib/yolov5.cc +++ b/fastdeploy/vision/detection/contrib/yolov5.cc @@ -93,7 +93,7 @@ bool YOLOv5::Initialize() { stride_ = 32; max_wh_ = 7680.0; multi_label_ = true; - reused_input_tensors.resize(1); + reused_input_tensors_.resize(1); if (!InitRuntime()) { FDERROR << "Failed to initialize fastdeploy backend." << std::endl; @@ -350,14 +350,14 @@ bool YOLOv5::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, std::map> im_info; if (use_cuda_preprocessing_) { - if (!CudaPreprocess(&mat, &reused_input_tensors[0], &im_info, size_, + if (!CudaPreprocess(&mat, &reused_input_tensors_[0], &im_info, size_, padding_value_, is_mini_pad_, is_no_pad_, is_scale_up_, stride_, max_wh_, multi_label_)) { FDERROR << "Failed to preprocess input image." << std::endl; return false; } } else { - if (!Preprocess(&mat, &reused_input_tensors[0], &im_info, size_, + if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info, size_, padding_value_, is_mini_pad_, is_no_pad_, is_scale_up_, stride_, max_wh_, multi_label_)) { FDERROR << "Failed to preprocess input image." << std::endl; @@ -365,13 +365,13 @@ bool YOLOv5::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, } } - reused_input_tensors[0].name = InputInfoOfRuntime(0).name; + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; if (!Infer()) { FDERROR << "Failed to inference." << std::endl; return false; } - if (!Postprocess(reused_output_tensors, result, im_info, conf_threshold, + if (!Postprocess(reused_output_tensors_, result, im_info, conf_threshold, nms_iou_threshold, multi_label_)) { FDERROR << "Failed to post process." << std::endl; return false; diff --git a/fastdeploy/vision/detection/contrib/yolov5lite.cc b/fastdeploy/vision/detection/contrib/yolov5lite.cc old mode 100644 new mode 100755 index 6657a2bf90a..f936b8ce50e --- a/fastdeploy/vision/detection/contrib/yolov5lite.cc +++ b/fastdeploy/vision/detection/contrib/yolov5lite.cc @@ -123,7 +123,7 @@ bool YOLOv5Lite::Initialize() { anchor_config = {{10.0, 13.0, 16.0, 30.0, 33.0, 23.0}, {30.0, 61.0, 62.0, 45.0, 59.0, 119.0}, {116.0, 90.0, 156.0, 198.0, 373.0, 326.0}}; - reused_input_tensors.resize(1); + reused_input_tensors_.resize(1); if (!InitRuntime()) { FDERROR << "Failed to initialize fastdeploy backend." << std::endl; @@ -426,31 +426,31 @@ bool YOLOv5Lite::Predict(cv::Mat* im, DetectionResult* result, static_cast(mat.Width())}; if (use_cuda_preprocessing_) { - if (!CudaPreprocess(&mat, &reused_input_tensors[0], &im_info)) { + if (!CudaPreprocess(&mat, &reused_input_tensors_[0], &im_info)) { FDERROR << "Failed to preprocess input image." << std::endl; return false; } } else { - if (!Preprocess(&mat, &reused_input_tensors[0], &im_info)) { + if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) { FDERROR << "Failed to preprocess input image." << std::endl; return false; } } - reused_input_tensors[0].name = InputInfoOfRuntime(0).name; + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; if (!Infer()) { FDERROR << "Failed to inference." << std::endl; return false; } if (is_decode_exported) { - if (!Postprocess(reused_output_tensors[0], result, im_info, conf_threshold, + if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold, nms_iou_threshold)) { FDERROR << "Failed to post process." << std::endl; return false; } } else { - if (!PostprocessWithDecode(reused_output_tensors[0], result, im_info, + if (!PostprocessWithDecode(reused_output_tensors_[0], result, im_info, conf_threshold, nms_iou_threshold)) { FDERROR << "Failed to post process." << std::endl; return false; diff --git a/fastdeploy/vision/detection/contrib/yolov6.cc b/fastdeploy/vision/detection/contrib/yolov6.cc old mode 100644 new mode 100755 index 70f79b9f47c..9d4f94d51f6 --- a/fastdeploy/vision/detection/contrib/yolov6.cc +++ b/fastdeploy/vision/detection/contrib/yolov6.cc @@ -96,7 +96,7 @@ bool YOLOv6::Initialize() { is_scale_up = false; stride = 32; max_wh = 4096.0f; - reused_input_tensors.resize(1); + reused_input_tensors_.resize(1); if (!InitRuntime()) { FDERROR << "Failed to initialize fastdeploy backend." << std::endl; @@ -311,24 +311,24 @@ bool YOLOv6::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, static_cast(mat.Width())}; if (use_cuda_preprocessing_) { - if (!CudaPreprocess(&mat, &reused_input_tensors[0], &im_info)) { + if (!CudaPreprocess(&mat, &reused_input_tensors_[0], &im_info)) { FDERROR << "Failed to preprocess input image." << std::endl; return false; } } else { - if (!Preprocess(&mat, &reused_input_tensors[0], &im_info)) { + if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) { FDERROR << "Failed to preprocess input image." << std::endl; return false; } } - reused_input_tensors[0].name = InputInfoOfRuntime(0).name; + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; if (!Infer()) { FDERROR << "Failed to inference." << std::endl; return false; } - if (!Postprocess(reused_output_tensors[0], result, im_info, conf_threshold, + if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold, nms_iou_threshold)) { FDERROR << "Failed to post process." << std::endl; return false; diff --git a/fastdeploy/vision/detection/contrib/yolov7.cc b/fastdeploy/vision/detection/contrib/yolov7.cc old mode 100644 new mode 100755 index c3fc1de4148..5b4ca4d46bb --- a/fastdeploy/vision/detection/contrib/yolov7.cc +++ b/fastdeploy/vision/detection/contrib/yolov7.cc @@ -94,7 +94,7 @@ bool YOLOv7::Initialize() { is_scale_up = false; stride = 32; max_wh = 7680.0; - reused_input_tensors.resize(1); + reused_input_tensors_.resize(1); if (!InitRuntime()) { FDERROR << "Failed to initialize fastdeploy backend." << std::endl; @@ -313,24 +313,24 @@ bool YOLOv7::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, static_cast(mat.Width())}; if (use_cuda_preprocessing_) { - if (!CudaPreprocess(&mat, &reused_input_tensors[0], &im_info)) { + if (!CudaPreprocess(&mat, &reused_input_tensors_[0], &im_info)) { FDERROR << "Failed to preprocess input image." << std::endl; return false; } } else { - if (!Preprocess(&mat, &reused_input_tensors[0], &im_info)) { + if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) { FDERROR << "Failed to preprocess input image." << std::endl; return false; } } - reused_input_tensors[0].name = InputInfoOfRuntime(0).name; + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; if (!Infer()) { FDERROR << "Failed to inference." << std::endl; return false; } - if (!Postprocess(reused_output_tensors[0], result, im_info, conf_threshold, + if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold, nms_iou_threshold)) { FDERROR << "Failed to post process." << std::endl; return false; diff --git a/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc b/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc old mode 100644 new mode 100755 index 80c14e9a2c7..6a86000e94b --- a/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc +++ b/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc @@ -86,7 +86,7 @@ bool YOLOv7End2EndORT::Initialize() { is_no_pad = false; is_scale_up = false; stride = 32; - reused_input_tensors.resize(1); + reused_input_tensors_.resize(1); if (!InitRuntime()) { FDERROR << "Failed to initialize fastdeploy backend." << std::endl; @@ -224,18 +224,18 @@ bool YOLOv7End2EndORT::Predict(cv::Mat* im, DetectionResult* result, im_info["output_shape"] = {static_cast(mat.Height()), static_cast(mat.Width())}; - if (!Preprocess(&mat, &reused_input_tensors[0], &im_info)) { + if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) { FDERROR << "Failed to preprocess input image." << std::endl; return false; } - reused_input_tensors[0].name = InputInfoOfRuntime(0).name; + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; if (!Infer()) { FDERROR << "Failed to inference." << std::endl; return false; } - if (!Postprocess(reused_output_tensors[0], result, im_info, conf_threshold)) { + if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold)) { FDERROR << "Failed to post process." << std::endl; return false; } diff --git a/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc b/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc old mode 100644 new mode 100755 index 6fabd53812a..671d00ddb84 --- a/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc +++ b/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc @@ -106,7 +106,7 @@ bool YOLOv7End2EndTRT::Initialize() { is_no_pad = false; is_scale_up = false; stride = 32; - reused_input_tensors.resize(1); + reused_input_tensors_.resize(1); if (!InitRuntime()) { FDERROR << "Failed to initialize fastdeploy backend." << std::endl; @@ -320,24 +320,24 @@ bool YOLOv7End2EndTRT::Predict(cv::Mat* im, DetectionResult* result, static_cast(mat.Width())}; if (use_cuda_preprocessing_) { - if (!CudaPreprocess(&mat, &reused_input_tensors[0], &im_info)) { + if (!CudaPreprocess(&mat, &reused_input_tensors_[0], &im_info)) { FDERROR << "Failed to preprocess input image." << std::endl; return false; } } else { - if (!Preprocess(&mat, &reused_input_tensors[0], &im_info)) { + if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) { FDERROR << "Failed to preprocess input image." << std::endl; return false; } } - reused_input_tensors[0].name = InputInfoOfRuntime(0).name; + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; if (!Infer()) { FDERROR << "Failed to inference." << std::endl; return false; } - if (!Postprocess(reused_output_tensors, result, im_info, conf_threshold)) { + if (!Postprocess(reused_output_tensors_, result, im_info, conf_threshold)) { FDERROR << "Failed to post process." << std::endl; return false; } diff --git a/fastdeploy/vision/detection/contrib/yolox.cc b/fastdeploy/vision/detection/contrib/yolox.cc old mode 100644 new mode 100755 index afac5d671ee..c1c07182633 --- a/fastdeploy/vision/detection/contrib/yolox.cc +++ b/fastdeploy/vision/detection/contrib/yolox.cc @@ -96,7 +96,7 @@ bool YOLOX::Initialize() { downsample_strides = {8, 16, 32}; max_wh = 4096.0f; is_decode_exported = false; - reused_input_tensors.resize(1); + reused_input_tensors_.resize(1); if (!InitRuntime()) { FDERROR << "Failed to initialize fastdeploy backend." << std::endl; @@ -290,25 +290,25 @@ bool YOLOX::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, im_info["output_shape"] = {static_cast(mat.Height()), static_cast(mat.Width())}; - if (!Preprocess(&mat, &reused_input_tensors[0], &im_info)) { + if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) { FDERROR << "Failed to preprocess input image." << std::endl; return false; } - reused_input_tensors[0].name = InputInfoOfRuntime(0).name; + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; if (!Infer()) { FDERROR << "Failed to inference." << std::endl; return false; } if (is_decode_exported) { - if (!Postprocess(reused_output_tensors[0], result, im_info, conf_threshold, + if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold, nms_iou_threshold)) { FDERROR << "Failed to post process." << std::endl; return false; } } else { - if (!PostprocessWithDecode(reused_output_tensors[0], result, im_info, + if (!PostprocessWithDecode(reused_output_tensors_[0], result, im_info, conf_threshold, nms_iou_threshold)) { FDERROR << "Failed to post process." << std::endl; return false; diff --git a/fastdeploy/vision/detection/ppdet/ppyoloe.cc b/fastdeploy/vision/detection/ppdet/ppyoloe.cc old mode 100644 new mode 100755 index 77400c739d7..00a82ace528 --- a/fastdeploy/vision/detection/ppdet/ppyoloe.cc +++ b/fastdeploy/vision/detection/ppdet/ppyoloe.cc @@ -55,7 +55,7 @@ bool PPYOLOE::Initialize() { FDERROR << "Failed to initialize fastdeploy backend." << std::endl; return false; } - reused_input_tensors.resize(2); + reused_input_tensors_.resize(2); return true; } @@ -252,7 +252,7 @@ bool PPYOLOE::Postprocess(std::vector& infer_result, bool PPYOLOE::Predict(cv::Mat* im, DetectionResult* result) { Mat mat(*im); - if (!Preprocess(&mat, &reused_input_tensors)) { + if (!Preprocess(&mat, &reused_input_tensors_)) { FDERROR << "Failed to preprocess input data while using model:" << ModelName() << "." << std::endl; return false; @@ -264,7 +264,7 @@ bool PPYOLOE::Predict(cv::Mat* im, DetectionResult* result) { return false; } - if (!Postprocess(reused_output_tensors, result)) { + if (!Postprocess(reused_output_tensors_, result)) { FDERROR << "Failed to postprocess while using model:" << ModelName() << "." << std::endl; return false; From c5c741dd032f8dc2396f5e9796ed8bf3baf283cb Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Wed, 9 Nov 2022 06:28:41 +0000 Subject: [PATCH 14/50] fixed docs --- .../download_prebuilt_libraries.md | 5 +++- .../download_prebuilt_libraries.md | 25 +++++++++++-------- 2 files changed, 18 insertions(+), 12 deletions(-) mode change 100644 => 100755 docs/en/build_and_install/download_prebuilt_libraries.md diff --git a/docs/cn/build_and_install/download_prebuilt_libraries.md b/docs/cn/build_and_install/download_prebuilt_libraries.md index 1f89a023f61..40a5878ec2a 100755 --- a/docs/cn/build_and_install/download_prebuilt_libraries.md +++ b/docs/cn/build_and_install/download_prebuilt_libraries.md @@ -1,8 +1,11 @@ - # 预编译库安装 FastDeploy提供各平台预编译库,供开发者直接下载安装使用。当然FastDeploy编译也非常容易,开发者也可根据自身需求编译FastDeploy。 +本文分为两部分: +- [1.GPU部署环境](##GPU部署环境) +- [2.CPU部署环境](##CPU部署环境) + ## GPU部署环境 ### 环境要求 diff --git a/docs/en/build_and_install/download_prebuilt_libraries.md b/docs/en/build_and_install/download_prebuilt_libraries.md old mode 100644 new mode 100755 index 39cc0156c52..b9cbed42b30 --- a/docs/en/build_and_install/download_prebuilt_libraries.md +++ b/docs/en/build_and_install/download_prebuilt_libraries.md @@ -1,8 +1,11 @@ - # How to Install Prebuilt Library FastDeploy provides pre-built libraries for developers to download and install directly. Meanwhile, FastDeploy also offers easy access to compile so that developers can compile FastDeploy according to their own needs. +This article is divided into two parts: +- [1.GPU Deployment Environment](##GPU Deployment Environment) +- [2.CPU Deployment Environment](##CPU Deployment Environment) + ## GPU Deployment Environment ### Environment Requirement @@ -16,10 +19,10 @@ FastDeploy supports Computer Vision, Text and NLP model deployment on CPU and Nv ### Python SDK -Install the released version(the newest 0.4.0 for now) +Install the released version(the newest 0.5.0 for now) ``` -pip install fastdeploy-gpu-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html +pip install fastdeploy-gpu-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html ``` Install the Develop version(Nightly build) @@ -36,12 +39,12 @@ conda config --add channels conda-forge && conda install cudatoolkit=11.2 cudnn= ### C++ SDK -Install the released version(Latest 0.4.0) +Install the released version(Latest 0.5.0) | Platform | File | Description | |:----------- |:--------------------------------------------------------------------------------------------------------------------- |:--------------------------------------------------------- | -| Linux x64 | [fastdeploy-linux-x64-gpu-0.4.0.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-0.4.0.tgz) | Compiled from g++ 8.2, CUDA 11.2, cuDNN 8.2 | -| Windows x64 | [fastdeploy-win-x64-gpu-0.4.0.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-gpu-0.4.0.zip) | Compiled from Visual Studio 16 2019, CUDA 11.2, cuDNN 8.2 | +| Linux x64 | [fastdeploy-linux-x64-gpu-0.5.0.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-0.5.0.tgz) | Compiled from g++ 8.2, CUDA 11.2, cuDNN 8.2 | +| Windows x64 | [fastdeploy-win-x64-gpu-0.5.0.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-gpu-0.5.0.zip) | Compiled from Visual Studio 16 2019, CUDA 11.2, cuDNN 8.2 | Install the Develop version(Nightly build) @@ -61,7 +64,7 @@ FastDeploy supports computer vision, text and NLP model deployment on CPU with P ### Python SDK -Install the released version(Latest 0.4.0 for now) +Install the released version(Latest 0.5.0 for now) ``` pip install fastdeploy-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html @@ -75,15 +78,15 @@ pip install fastdeploy-python==0.0.0 -f https://www.paddlepaddle.org.cn/whl/fast ### C++ SDK -Install the released version(Latest 0.4.0 for now) +Install the released version(Latest 0.5.0 for now, Android is 0.4.0 pre-release) | Platform | File | Description | |:------------- |:--------------------------------------------------------------------------------------------------------------------- |:------------------------------ | -| Linux x64 | [fastdeploy-linux-x64-0.4.0.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-0.4.0.tgz) | Compiled from g++ 8.2 | -| Windows x64 | [fastdeploy-win-x64-0.4.0.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-0.4.0.zip) | Compiled from Visual Studio 16 | +| Linux x64 | [fastdeploy-linux-x64-0.5.0.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-0.5.0.tgz) | Compiled from g++ 8.2 | +| Windows x64 | [fastdeploy-win-x64-0.5.0.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-0.5.0.zip) | Compiled from Visual Studio 16 | | Mac OSX x64 | [fastdeploy-osx-x86_64-0.4.0.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-x86_64-0.4.0.tgz) | - | | Mac OSX arm64 | [fastdeploy-osx-arm64-0.4.0.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-arm64-0.4.0.tgz) | - | -| Linux aarch64 | [fastdeploy-linux-aarch64-0.2.0.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-0.2.0.tgz) | Compiled from g++ 6.3.0 | +| Linux aarch64 | [fastdeploy-linux-aarch64-0.4.0.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-0.4.0.tgz) | Compiled from g++ 6.3.0 | | Android armv7&v8 | [fastdeploy-android-0.4.0-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-0.4.0-shared.tgz) | Compiled from NDK 25 and clang++, support arm64-v8a and armeabi-v7a | Install the Develop version(Nightly build) From d4ebde0665ba8bf301fa2bdbdc79639cc4c1c4e2 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Wed, 9 Nov 2022 08:38:37 +0000 Subject: [PATCH 15/50] fixed headpose typo --- examples/vision/headpose/README.md | 2 +- examples/vision/headpose/fsanet/cpp/README.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) mode change 100644 => 100755 examples/vision/headpose/README.md mode change 100644 => 100755 examples/vision/headpose/fsanet/cpp/README.md diff --git a/examples/vision/headpose/README.md b/examples/vision/headpose/README.md old mode 100644 new mode 100755 index d4be67871c3..b727752e0a5 --- a/examples/vision/headpose/README.md +++ b/examples/vision/headpose/README.md @@ -1,6 +1,6 @@ # 头部姿态模型 -FastDeploy目前支持如下人脸对齐模型部署 +FastDeploy目前支持如下头部姿态模型部署 | 模型 | 说明 | 模型格式 | 版本 | | :--- | :--- | :------- | :--- | diff --git a/examples/vision/headpose/fsanet/cpp/README.md b/examples/vision/headpose/fsanet/cpp/README.md old mode 100644 new mode 100755 index 9fc719192be..1a3a5176992 --- a/examples/vision/headpose/fsanet/cpp/README.md +++ b/examples/vision/headpose/fsanet/cpp/README.md @@ -68,6 +68,7 @@ FSANet模型加载和初始化,其中model_file为导出的ONNX模型格式。 ### 类成员变量 用户可按照自己的实际需求,修改下列预处理参数,从而影响最终的推理和部署效果 > > * **size**(vector<int>): 通过此参数修改预处理过程中resize的大小,包含两个整型元素,表示[width, height], 默认值为[112, 112] + - [模型介绍](../../) - [Python部署](../python) - [视觉模型预测结果](../../../../../docs/api/vision_results/) From 6f653daef4f98ec450cde3230f244f5ecdcf6473 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Wed, 9 Nov 2022 08:41:49 +0000 Subject: [PATCH 16/50] fixed typo --- examples/text/ernie-3.0/python/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 examples/text/ernie-3.0/python/requirements.txt diff --git a/examples/text/ernie-3.0/python/requirements.txt b/examples/text/ernie-3.0/python/requirements.txt old mode 100644 new mode 100755 index 204cf718cdf..29711008e23 --- a/examples/text/ernie-3.0/python/requirements.txt +++ b/examples/text/ernie-3.0/python/requirements.txt @@ -1,2 +1,2 @@ -faster_toeknizer +faster_tokenizer paddlenlp From 005897cb08960580786bc7f6d95c26b32b439959 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Wed, 9 Nov 2022 14:26:00 +0000 Subject: [PATCH 17/50] refactor yolov5 --- .../detection/contrib/yolov5/postprocessor.cc | 145 ++++++++++++ .../detection/contrib/yolov5/postprocessor.h | 80 +++++++ .../detection/contrib/yolov5/preprocessor.cc | 206 ++++++++++++++++++ .../detection/contrib/yolov5/preprocessor.h | 114 ++++++++++ .../vision/detection/contrib/yolov5/yolov5.cc | 99 +++++++++ .../vision/detection/contrib/yolov5/yolov5.h | 90 ++++++++ .../detection/contrib/yolov5/yolov5_pybind.cc | 73 +++++++ 7 files changed, 807 insertions(+) create mode 100755 fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc create mode 100755 fastdeploy/vision/detection/contrib/yolov5/postprocessor.h create mode 100755 fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc create mode 100755 fastdeploy/vision/detection/contrib/yolov5/preprocessor.h create mode 100755 fastdeploy/vision/detection/contrib/yolov5/yolov5.cc create mode 100755 fastdeploy/vision/detection/contrib/yolov5/yolov5.h create mode 100755 fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc new file mode 100755 index 00000000000..9f892cd40e0 --- /dev/null +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc @@ -0,0 +1,145 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/classification/ppcls/postprocessor.h" +#include "fastdeploy/vision/utils/utils.h" + +namespace fastdeploy { +namespace vision { +namespace classification { + +YOLOv5Postprocessor::YOLOv5Postprocessor() { + conf_threshold_ = 0.25; + nms_threshold_ = 0.5; + multi_label_ = true; + initialized_ = true; + max_wh_ = 7680.0 +} + +bool YOLOv5Postprocessor::Postprocess( + const std::vector& infer_results, std::vector* results, + const std::map>& im_info) { + auto& infer_result = infer_results[0]; + for (size_t bs = 0; bs < results->size(); ++bs) { + *results[bs].Clear(); + if (multi_label_) { + *results[bs].Reserve(infer_result.shape[1] * (infer_result.shape[2] - 5)); + } else { + *results[bs].Reserve(infer_result.shape[1]); + } + if (infer_result.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + float* data = static_cast(infer_result.Data()) + bs * infer_result.shape[1] * infer_result.shape[2]; + for (size_t i = 0; i < infer_result.shape[1]; ++i) { + int s = i * infer_result.shape[2]; + float confidence = data[s + 4]; + if (multi_label_) { + for (size_t j = 5; j < infer_result.shape[2]; ++j) { + confidence = data[s + 4]; + float* class_score = data + s + j; + confidence *= (*class_score); + // filter boxes by conf_threshold + if (confidence <= conf_threshold_) { + continue; + } + int32_t label_id = std::distance(data + s + 5, class_score); + + // convert from [x, y, w, h] to [x1, y1, x2, y2] + *results[bs].boxes.emplace_back(std::array{ + data[s] - data[s + 2] / 2.0f + label_id * max_wh_, + data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_, + data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_, + data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_}); + *results[bs].label_ids.push_back(label_id); + *results[bs].scores.push_back(confidence); + } + } else { + float* max_class_score = + std::max_element(data + s + 5, data + s + infer_result.shape[2]); + confidence *= (*max_class_score); + // filter boxes by conf_threshold + if (confidence <= conf_threshold_) { + continue; + } + int32_t label_id = std::distance(data + s + 5, max_class_score); + // convert from [x, y, w, h] to [x1, y1, x2, y2] + *results[bs].boxes.emplace_back(std::array{ + data[s] - data[s + 2] / 2.0f + label_id * max_wh_, + data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_, + data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_, + data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_}); + *results[bs].label_ids.push_back(label_id); + *results[bs].scores.push_back(confidence); + } + } + + if (*results[bs].boxes.size() == 0) { + return true; + } + + utils::NMS(&(*results[bs]), nms_threshold_); + + // scale the boxes to the origin image shape + auto iter_out = im_info.find("output_shape"); + auto iter_ipt = im_info.find("input_shape"); + FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), + "Cannot find input_shape or output_shape from im_info."); + float out_h = iter_out->second[0]; + float out_w = iter_out->second[1]; + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + float scale = std::min(out_h / ipt_h, out_w / ipt_w); + for (size_t i = 0; i < *results[bs].boxes.size(); ++i) { + float pad_h = (out_h - ipt_h * scale) / 2; + float pad_w = (out_w - ipt_w * scale) / 2; + int32_t label_id = (*results[bs].label_ids)[i]; + // clip box + *results[bs].boxes[i][0] = *results[bs].boxes[i][0] - max_wh * label_id; + *results[bs].boxes[i][1] = *results[bs].boxes[i][1] - max_wh * label_id; + *results[bs].boxes[i][2] = *results[bs].boxes[i][2] - max_wh * label_id; + *results[bs].boxes[i][3] = *results[bs].boxes[i][3] - max_wh * label_id; + *results[bs].boxes[i][0] = std::max((*results[bs].boxes[i][0] - pad_w) / scale, 0.0f); + *results[bs].boxes[i][1] = std::max((*results[bs].boxes[i][1] - pad_h) / scale, 0.0f); + *results[bs].boxes[i][2] = std::max((*results[bs].boxes[i][2] - pad_w) / scale, 0.0f); + *results[bs].boxes[i][3] = std::max((*results[bs].boxes[i][3] - pad_h) / scale, 0.0f); + *results[bs].boxes[i][0] = std::min(*results[bs].boxes[i][0], ipt_w); + *results[bs].boxes[i][1] = std::min(*results[bs].boxes[i][1], ipt_h); + *results[bs].boxes[i][2] = std::min(*results[bs].boxes[i][2], ipt_w); + *results[bs].boxes[i][3] = std::min(*results[bs].boxes[i][3], ipt_h); + } + } + return true; +} + +bool YOLOv5Postprocessor::Run(const std::vector& tensors, std::vector* results, + std::map>* im_info) { + if (!initialized_) { + FDERROR << "Postprocessor is not initialized." << std::endl; + return false; + } + + int batch = tensors[0].shape[0]; + + results->resize(batch); + + if (!Postprocess(tensors, results, &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + return true; +} // namespace classification +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h new file mode 100755 index 00000000000..476f477f671 --- /dev/null +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h @@ -0,0 +1,80 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "fastdeploy/vision/common/processors/transform.h" +#include "fastdeploy/vision/common/result.h" + +namespace fastdeploy { +namespace vision { + +namespace detection { +/*! @brief Postprocessor object for YOLOv5 serials model. + */ +class FASTDEPLOY_DECL YOLOv5Postprocessor { + public: + /** \brief Create a postprocessor instance for YOLOv5 serials model + */ + YOLOv5Postprocessor(); + + /** \brief Process the result of runtime and fill to DetectionResult structure + * + * \param[in] tensors The inference result from runtime + * \param[in] result The output result of detection + * \param[in] im_info record input_shape and output_shape + * \return true if the postprocess successed, otherwise false + */ + bool Run(const std::vector& tensors, + std::vector* results, + std::map>* im_info); + + /// Set conf_threshold, default 0.25 + void SetConfThreshold(float conf_threshold) { + conf_threshold_ = conf_threshold; + } + + /// Get conf_threshold, default 0.25 + void GetConfThreshold() const { return conf_threshold_; } + + /// Set nms_threshold, default 0.5 + void SetNMSThreshold(float nms_threshold) { + nms_threshold_ = nms_threshold; + } + + /// Get nms_threshold, default 0.5 + void GetNMSThreshold() const { return nms_threshold_; } + + /// Set multi_label, default true + void SetMultiLabel(bool multi_label) { + multi_label_ = multi_label; + } + + /// Get multi_label, default true + void GetMultiLabel() const { return multi_label_; } + + private: + bool Postprocess(const std::vector& tensors, + std::vector* results, + const std::map>& im_info); + + bool initialized_ = false; + float conf_threshold_; + float nms_threshold_; + bool multi_label_; + float max_wh_; +}; + +} // namespace detection +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc new file mode 100755 index 00000000000..506d0decdce --- /dev/null +++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc @@ -0,0 +1,206 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/detection/contrib/yolov5/preprocessor.h" +#include "fastdeploy/function/concat.h" +#ifdef ENABLE_CUDA_PREPROCESS +#include "fastdeploy/vision/utils/cuda_utils.h" +#endif // ENABLE_CUDA_PREPROCESS + +namespace fastdeploy { +namespace vision { +namespace detection { + +YOLOv5Preprocessor::YOLOv5Preprocessor() { + size_ = {640, 640}; + padding_value_ = {114.0, 114.0, 114.0}; + is_mini_pad_ = false; + is_no_pad_ = false; + is_scale_up_ = false; + stride_ = 32; + max_wh_ = 7680.0; + initialized_ = true; +} + +void YOLOv5Preprocessor::LetterBox(FDMat* mat) { + float scale = + std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width()); + if (!is_scale_up_) { + scale = std::min(scale, 1.0f); + } + + int resize_h = int(round(mat->Height() * scale)); + int resize_w = int(round(mat->Width() * scale)); + + int pad_w = size_[0] - resize_w; + int pad_h = size_[1] - resize_h; + if (is_mini_pad_) { + pad_h = pad_h % stride_; + pad_w = pad_w % stride_; + } else if (is_no_pad_) { + pad_h = 0; + pad_w = 0; + resize_h = size_[1]; + resize_w = size_[0]; + } + Resize::Run(mat, resize_w, resize_h); + if (pad_h > 0 || pad_w > 0) { + float half_h = pad_h * 1.0 / 2; + int top = int(round(half_h - 0.1)); + int bottom = int(round(half_h + 0.1)); + float half_w = pad_w * 1.0 / 2; + int left = int(round(half_w - 0.1)); + int right = int(round(half_w + 0.1)); + Pad::Run(mat, top, bottom, left, right, padding_value_); + } +} + +bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output, + std::map>* im_info) { + // Record the shape of image and the shape of preprocessed image + (*im_info)["input_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + // process after image load + double ratio = (size_[0] * 1.0) / std::max(static_cast(mat->Height()), + static_cast(mat->Width())); + if (ratio != 1.0) { + int interp = cv::INTER_AREA; + if (ratio > 1.0) { + interp = cv::INTER_LINEAR; + } + int resize_h = int(mat->Height() * ratio); + int resize_w = int(mat->Width() * ratio); + Resize::Run(mat, resize_w, resize_h, -1, -1, interp); + } + // yolov5's preprocess steps + // 1. letterbox + // 2. BGR->RGB + // 3. HWC->CHW + LetterBox(mat); + BGR2RGB::Run(mat); + // Compute `result = mat * alpha + beta` directly by channel + std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; + std::vector beta = {0.0f, 0.0f, 0.0f}; + Convert::Run(mat, alpha, beta); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + return true; +} + +void YOLOv5Preprocessor::UseCudaPreprocessing(int max_image_size) { +#ifdef ENABLE_CUDA_PREPROCESS + use_cuda_preprocessing_ = true; + is_scale_up_ = true; + if (input_img_cuda_buffer_host_ == nullptr) { + // prepare input data cache in GPU pinned memory + CUDA_CHECK(cudaMallocHost((void**)&input_img_cuda_buffer_host_, + max_image_size * 3)); + // prepare input data cache in GPU device memory + CUDA_CHECK( + cudaMalloc((void**)&input_img_cuda_buffer_device_, max_image_size * 3)); + CUDA_CHECK(cudaMalloc((void**)&input_tensor_cuda_buffer_device_, + 3 * size_[0] * size_[1] * sizeof(float))); + } +#else + FDWARNING << "The FastDeploy didn't compile with BUILD_CUDA_SRC=ON." + << std::endl; + use_cuda_preprocessing_ = false; +#endif +} + +bool YOLOv5Preprocessor::CudaPreprocess(FDMat* mat, FDTensor* output, + std::map>* im_info) { +#ifdef ENABLE_CUDA_PREPROCESS + if (is_mini_pad_ != false || is_no_pad_ != false || is_scale_up_ != true) { + FDERROR << "Preprocessing with CUDA is only available when the arguments " + "satisfy (is_mini_pad_=false, is_no_pad_=false, is_scale_up_=true)." + << std::endl; + return false; + } + + // Record the shape of image and the shape of preprocessed image + (*im_info)["input_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + cudaStream_t stream = reinterpret_cast(cuda_stream_); + int src_img_buf_size = mat->Height() * mat->Width() * mat->Channels(); + memcpy(input_img_cuda_buffer_host_, mat->Data(), src_img_buf_size); + CUDA_CHECK(cudaMemcpyAsync(input_img_cuda_buffer_device_, + input_img_cuda_buffer_host_, src_img_buf_size, + cudaMemcpyHostToDevice, stream)); + utils::CudaYoloPreprocess(input_img_cuda_buffer_device_, mat->Width(), + mat->Height(), input_tensor_cuda_buffer_device_, + size_[0], size_[1], padding_value_, stream); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(size_[0]), + static_cast(size_[1])}; + + output->SetExternalData({mat->Channels(), size_[0], size_[1]}, FDDataType::FP32, + input_tensor_cuda_buffer_device_); + output->device = Device::GPU; + output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + return true; +#else + FDERROR << "CUDA src code was not enabled." << std::endl; + return false; +#endif // ENABLE_CUDA_PREPROCESS +} + +bool YOLOv5Preprocessor::Run(std::vector* images, std::vector* outputs, + std::map>* im_info) { + if (!initialized_) { + FDERROR << "The preprocessor is not initialized." << std::endl; + return false; + } + if (images->size() == 0) { + FDERROR << "The size of input images should be greater than 0." << std::endl; + return false; + } + outputs->resize(1); + // Concat all the preprocessed data to a batch tensor + std::vector tensors(images->size()); + for (size_t i = 0; i < images->size(); ++i) { + if (use_cuda_preprocessing_) { + if (!CudaPreprocess(&(*images)[i], &tensors[i], im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + } else { + if (!Preprocess(&(*images)[i], &tensors[i], im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + } + } + + if (tensors.size() == 1) { + (*outputs)[0] = std::move(tensors[0]); + } else { + function::Concat(tensors, &((*outputs)[0]), 0); + } + return true; +} + +} // namespace detection +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h new file mode 100755 index 00000000000..a69002966b0 --- /dev/null +++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h @@ -0,0 +1,114 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "fastdeploy/vision/common/processors/transform.h" +#include "fastdeploy/vision/common/result.h" + +namespace fastdeploy { +namespace vision { + +namespace detection { +/*! @brief Preprocessor object for YOLOv5 serials model. + */ +class FASTDEPLOY_DECL YOLOv5Preprocessor { + public: + /** \brief Create a preprocessor instance for YOLOv5 serials model + */ + YOLOv5Preprocessor(); + + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input image data list, all the elements are returned by cv::imread() + * \param[in] outputs The output tensors which will feed in runtime + * \param[in] im_info record input_shape and output_shape + * \return true if the preprocess successed, otherwise false + */ + bool Run(std::vector* images, std::vector* outputs, + std::map>* im_info); + + /// Set target size, tuple of (width, height), default size = {640, 640} + void SetSize(std::vector size) { size_ = size; } + + /// Get target size, tuple of (width, height), default size = {640, 640} + void GetSize() const { return size_; } + + /// Set padding value, size should be the same as channels + void SetPaddingValue(std::vector padding_value) { + padding_value_ = padding_value; + } + + /// Get padding value, size should be the same as channels + void GetPaddingValue() const { return padding_value_; } + + private: + bool Preprocess(FDMat* mat, FDTensor* output, + std::map>* im_info); + + void UseCudaPreprocessing(int max_img_size = 3840 * 2160); + + bool CudaPreprocess(FDMat* mat, FDTensor* output, + std::map>* im_info); + + bool IsDynamicInput() const { return is_dynamic_input_; } + + void LetterBox(FDMat* mat); + + bool initialized_ = false; + // target size, tuple of (width, height), default size = {640, 640} + std::vector size_; + + // padding value, size should be the same as channels + std::vector padding_value_; + + // only pad to the minimum rectange which height and width is times of stride + bool is_mini_pad_; + + // while is_mini_pad = false and is_no_pad = true, + // will resize the image to the set size + bool is_no_pad_; + + // if is_scale_up is false, the input image only can be zoom out, + // the maximum resize scale cannot exceed 1.0 + bool is_scale_up_; + + // padding stride, for is_mini_pad + int stride_; + + // for offseting the boxes by classes when using NMS + float max_wh_; + + // whether to inference with dynamic shape (e.g ONNX export with dynamic shape + // or not.) + // YOLOv5 official 'export_onnx.py' script will export dynamic ONNX by + // default. + // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This + // value will + // auto check by fastdeploy after the internal Runtime already initialized. + bool is_dynamic_input_; + // CUDA host buffer for input image + uint8_t* input_img_cuda_buffer_host_ = nullptr; + // CUDA device buffer for input image + uint8_t* input_img_cuda_buffer_device_ = nullptr; + // CUDA device buffer for TRT input tensor + float* input_tensor_cuda_buffer_device_ = nullptr; + // Whether to use CUDA preprocessing + bool use_cuda_preprocessing_ = false; + // CUDA stream + void* cuda_stream_ = nullptr; +}; + +} // namespace detection +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc new file mode 100755 index 00000000000..2e311cb111c --- /dev/null +++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc @@ -0,0 +1,99 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/detection/contrib/yolov5.h" + +namespace fastdeploy { +namespace vision { +namespace detection { + +YOLOv5::YOLOv5(const std::string& model_file, const std::string& params_file, + const RuntimeOption& custom_option, + const ModelFormat& model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; +#ifdef ENABLE_CUDA_PREPROCESS + cudaSetDevice(runtime_option.device_id); + cudaStream_t stream; + CUDA_CHECK(cudaStreamCreate(&stream)); + cuda_stream_ = reinterpret_cast(stream); + runtime_option.SetExternalStream(cuda_stream_); +#endif // ENABLE_CUDA_PREPROCESS + initialized = Initialize(); +} + +bool YOLOv5::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize fastdeploy backend." << std::endl; + return false; + } + return true; +} + +YOLOv5::~YOLOv5() { +#ifdef ENABLE_CUDA_PREPROCESS + if (use_cuda_preprocessing_) { + CUDA_CHECK(cudaFreeHost(input_img_cuda_buffer_host_)); + CUDA_CHECK(cudaFree(input_img_cuda_buffer_device_)); + CUDA_CHECK(cudaFree(input_tensor_cuda_buffer_device_)); + CUDA_CHECK(cudaStreamDestroy(reinterpret_cast(cuda_stream_))); + } +#endif // ENABLE_CUDA_PREPROCESS +} + +bool YOLOv5::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, float nms_threshold) { + postprocessor_.SetConfThreshold(conf_threshold); + postprocessor_.SetNMSThreshold(nms_threshold); + +} + +YOLOv5::Predict(const cv::Mat* im, DetectionResult* result) { + + } + +YOLOv5::BatchPredict(const std::vector& images, std::vector* results) { + std::map> im_info; + + std::vector fd_images = WrapMat(images); + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &im_info)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, results, &im_info)) { + FDERROR << "Failed to postprocess the inference results by runtime." << std::endl; + return false; + } + + return true; +} + +} // namespace detection +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.h b/fastdeploy/vision/detection/contrib/yolov5/yolov5.h new file mode 100755 index 00000000000..78621398881 --- /dev/null +++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.h @@ -0,0 +1,90 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/fastdeploy_model.h" +#include "fastdeploy/vision/detection/contrib/yolov5/preprocessor.h" +#include "fastdeploy/vision/detection/contrib/yolov5/postprocessor.h" + +namespace fastdeploy { +namespace vision { +namespace detection { +/*! @brief YOLOv5 model object used when to load a YOLOv5 model exported by YOLOv5. + */ +class FASTDEPLOY_DECL YOLOv5 : public FastDeployModel { + public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolov5.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ + YOLOv5(const std::string& model_file, const std::string& params_file = "", + const RuntimeOption& custom_option = RuntimeOption(), + const ModelFormat& model_format = ModelFormat::ONNX); + + ~YOLOv5(); + + std::string ModelName() const { return "yolov5"; } + + /** \brief Predict the detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output detection result will be writen to this structure + * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.25 + * \param[in] nms_threshold iou threashold for NMS, default is 0.5 + * \return true if the prediction successed, otherwise false + */ + virtual bool Predict(cv::Mat* im, DetectionResult* result, + float conf_threshold = 0.25, + float nms_threshold = 0.5); + + /** \brief Predict the detection result for an input image + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output detection result will be writen to this structure + * \return true if the prediction successed, otherwise false + */ + virtual bool Predict(const cv::Mat& img, DetectionResult* result); + + /** \brief Predict the detection results for a batch of input images + * + * \param[in] imgs, The input image list, each element comes from cv::imread() + * \param[in] results The output classification result list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector& imgs, + std::vector* results); + + /// Get preprocessor reference of YOLOv5 + virtual YOLOv5Preprocessor& GetPreprocessor() { + return preprocessor_; + } + + /// Get postprocessor reference of YOLOv5 + virtual YOLOv5Postprocessor& GetPostprocessor() { + return postprocessor_; + } + + protected: + bool Initialize(); + YOLOv5Preprocessor preprocessor_; + YOLOv5Postprocessor postprocessor_; +}; + +} // namespace detection +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc new file mode 100755 index 00000000000..19e59d1c670 --- /dev/null +++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc @@ -0,0 +1,73 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/pybind/main.h" + +namespace fastdeploy { +void BindYOLOv5(pybind11::module& m) { + pybind11::class_(m, "YOLOv5") + .def(pybind11::init()) + .def("predict", + [](vision::detection::YOLOv5& self, pybind11::array& data, + float conf_threshold, float nms_threshold) { + auto mat = PyArrayToCvMat(data); + vision::DetectionResult res; + self.Predict(&mat, &res, conf_threshold, nms_threshold); + return res; + }) + .def("use_cuda_preprocessing", + [](vision::detection::YOLOv5& self, int max_image_size) { + self.UseCudaPreprocessing(max_image_size); + }) + .def_static("preprocess", + [](pybind11::array& data, const std::vector& size, + const std::vector padding_value, bool is_mini_pad, + bool is_no_pad, bool is_scale_up, int stride, float max_wh, + bool multi_label) { + auto mat = PyArrayToCvMat(data); + fastdeploy::vision::Mat fd_mat(mat); + FDTensor output; + std::map> im_info; + vision::detection::YOLOv5::Preprocess( + &fd_mat, &output, &im_info, size, padding_value, + is_mini_pad, is_no_pad, is_scale_up, stride, max_wh, + multi_label); + return make_pair(TensorToPyArray(output), im_info); + }) + .def_static( + "postprocess", + [](std::vector infer_results, + const std::map>& im_info, + float conf_threshold, float nms_threshold, bool multi_label, + float max_wh) { + std::vector fd_infer_results(infer_results.size()); + PyArrayToTensorList(infer_results, &fd_infer_results, true); + vision::DetectionResult result; + vision::detection::YOLOv5::Postprocess( + fd_infer_results, &result, im_info, conf_threshold, + nms_threshold, multi_label, max_wh); + return result; + }) + .def_readwrite("size", &vision::detection::YOLOv5::size_) + .def_readwrite("padding_value", + &vision::detection::YOLOv5::padding_value_) + .def_readwrite("is_mini_pad", &vision::detection::YOLOv5::is_mini_pad_) + .def_readwrite("is_no_pad", &vision::detection::YOLOv5::is_no_pad_) + .def_readwrite("is_scale_up", &vision::detection::YOLOv5::is_scale_up_) + .def_readwrite("stride", &vision::detection::YOLOv5::stride_) + .def_readwrite("max_wh", &vision::detection::YOLOv5::max_wh_) + .def_readwrite("multi_label", &vision::detection::YOLOv5::multi_label_); +} +} // namespace fastdeploy From 0509089941ae5a6fc2f9782d8023007900de755a Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Wed, 9 Nov 2022 14:31:37 +0000 Subject: [PATCH 18/50] update model infer --- .../vision/detection/contrib/yolov5/yolov5.cc | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc index 2e311cb111c..e10ceb9ed2c 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc @@ -64,17 +64,25 @@ YOLOv5::~YOLOv5() { bool YOLOv5::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, float nms_threshold) { postprocessor_.SetConfThreshold(conf_threshold); postprocessor_.SetNMSThreshold(nms_threshold); - + if (!Predict(*im, result)) { + return false; + } + return true; } -YOLOv5::Predict(const cv::Mat* im, DetectionResult* result) { - - } +bool YOLOv5::Predict(const cv::Mat& im, DetectionResult* result) { + std::vector results; + if (!BatchPredict({im}, &results)) { + return false; + } + *result = std::move(results[0]); + return true; +} -YOLOv5::BatchPredict(const std::vector& images, std::vector* results) { +bool YOLOv5::BatchPredict(const std::vector& images, std::vector* results) { std::map> im_info; - std::vector fd_images = WrapMat(images); + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &im_info)) { FDERROR << "Failed to preprocess the input image." << std::endl; return false; From febb2c14f7b9acc10373bfb5d2e34712f5b1ec19 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Thu, 10 Nov 2022 08:49:31 +0000 Subject: [PATCH 19/50] refactor pybind for yolov5 --- .../detection/contrib/yolov5/postprocessor.cc | 4 +- .../detection/contrib/yolov5/postprocessor.h | 2 +- .../detection/contrib/yolov5/preprocessor.cc | 4 +- .../detection/contrib/yolov5/preprocessor.h | 5 +- .../detection/contrib/yolov5/yolov5_pybind.cc | 105 ++++---- .../vision/detection/contrib/yolov5.py | 246 +++++++++--------- 6 files changed, 195 insertions(+), 171 deletions(-) diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc index 9f892cd40e0..efc5157c117 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc @@ -125,7 +125,7 @@ bool YOLOv5Postprocessor::Postprocess( } bool YOLOv5Postprocessor::Run(const std::vector& tensors, std::vector* results, - std::map>* im_info) { + const std::map>& im_info) { if (!initialized_) { FDERROR << "Postprocessor is not initialized." << std::endl; return false; @@ -135,7 +135,7 @@ bool YOLOv5Postprocessor::Run(const std::vector& tensors, std::vector< results->resize(batch); - if (!Postprocess(tensors, results, &im_info)) { + if (!Postprocess(tensors, results, im_info)) { FDERROR << "Failed to preprocess input image." << std::endl; return false; } diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h index 476f477f671..c40624820b6 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h @@ -37,7 +37,7 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor { */ bool Run(const std::vector& tensors, std::vector* results, - std::map>* im_info); + const std::map>& im_info); /// Set conf_threshold, default 0.25 void SetConfThreshold(float conf_threshold) { diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc index 506d0decdce..61de32d2770 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc @@ -102,7 +102,7 @@ bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output, HWC2CHW::Run(mat); Cast::Run(mat, "float"); mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->ExpandDim(0); // reshape to n, h, w, c return true; } @@ -158,7 +158,7 @@ bool YOLOv5Preprocessor::CudaPreprocess(FDMat* mat, FDTensor* output, output->SetExternalData({mat->Channels(), size_[0], size_[1]}, FDDataType::FP32, input_tensor_cuda_buffer_device_); output->device = Device::GPU; - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->ExpandDim(0); // reshape to n, h, w, c return true; #else FDERROR << "CUDA src code was not enabled." << std::endl; diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h index a69002966b0..b57af35909f 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h +++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h @@ -52,12 +52,13 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor { /// Get padding value, size should be the same as channels void GetPaddingValue() const { return padding_value_; } + /// Use Cuda Preprocess + void UseCudaPreprocessing(int max_img_size = 3840 * 2160); + private: bool Preprocess(FDMat* mat, FDTensor* output, std::map>* im_info); - void UseCudaPreprocessing(int max_img_size = 3840 * 2160); - bool CudaPreprocess(FDMat* mat, FDTensor* output, std::map>* im_info); diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc index 19e59d1c670..21bfd830fd4 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc @@ -16,58 +16,73 @@ namespace fastdeploy { void BindYOLOv5(pybind11::module& m) { + pybind11::class_( + m, "YOLOv5Preprocessor") + .def(pybind11::init()) + .def("run", [](vision::detection::YOLOv5Preprocessor& self, std::vector& im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + std::map> im_info; + if (!self.Run(&images, &outputs, &im_info)) { + pybind11::eval("raise Exception('Failed to preprocess the input data in PaddleClasPreprocessor.')"); + } + return make_pair(outputs, im_info); + }) + .def("use_cuda_preprocessing", + [](vision::detection::YOLOv5Preprocessor& self, int max_image_size) { + self.UseCudaPreprocessing(max_image_size); + }) + .def_property("size", &vision::detection::YOLOv5Preprocessor::GetSize, &vision::detection::YOLOv5Preprocessor::SetSize) + .def_property("padding_value", &vision::detection::YOLOv5Preprocessor::GetPaddingValue, &vision::detection::YOLOv5Preprocessor::SetPaddingValue); + + pybind11::class_( + m, "YOLOv5Postprocessor") + .def(pybind11::init()) + .def("run", [](vision::detection::YOLOv5Postprocessor& self, std::vector& inputs, + const std::map>& im_info) { + std::vector results; + if (!self.Run(inputs, &results, im_info)) { + pybind11::eval("raise Exception('Failed to postprocess the runtime result in YOLOv5Postprocessor.')"); + } + return results; + }) + .def("run", [](vision::detection::YOLOv5Postprocessor& self, std::vector& input_array, + const std::map>& im_info) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results)) { + pybind11::eval("raise Exception('Failed to postprocess the runtime result in YOLOv5Postprocessor.')"); + } + return results; + }) + .def_property("conf_threshold", &vision::detection::YOLOv5Postprocessor::GetConfThreshold, &vision::detection::YOLOv5Postprocessor::SetConfThreshold) + .def_property("nms_threshold", &vision::detection::YOLOv5Postprocessor::GetNMSThreshold, &vision::detection::YOLOv5Postprocessor::SetNMSThreshold) + .def_property("multi_label", &vision::detection::YOLOv5Postprocessor::GetMultiLabel, &vision::detection::YOLOv5Postprocessor::SetMultiLabel); + pybind11::class_(m, "YOLOv5") .def(pybind11::init()) .def("predict", - [](vision::detection::YOLOv5& self, pybind11::array& data, - float conf_threshold, float nms_threshold) { + [](vision::detection::YOLOv5& self, pybind11::array& data) { auto mat = PyArrayToCvMat(data); vision::DetectionResult res; - self.Predict(&mat, &res, conf_threshold, nms_threshold); + self.Predict(mat, &res); return res; }) - .def("use_cuda_preprocessing", - [](vision::detection::YOLOv5& self, int max_image_size) { - self.UseCudaPreprocessing(max_image_size); - }) - .def_static("preprocess", - [](pybind11::array& data, const std::vector& size, - const std::vector padding_value, bool is_mini_pad, - bool is_no_pad, bool is_scale_up, int stride, float max_wh, - bool multi_label) { - auto mat = PyArrayToCvMat(data); - fastdeploy::vision::Mat fd_mat(mat); - FDTensor output; - std::map> im_info; - vision::detection::YOLOv5::Preprocess( - &fd_mat, &output, &im_info, size, padding_value, - is_mini_pad, is_no_pad, is_scale_up, stride, max_wh, - multi_label); - return make_pair(TensorToPyArray(output), im_info); - }) - .def_static( - "postprocess", - [](std::vector infer_results, - const std::map>& im_info, - float conf_threshold, float nms_threshold, bool multi_label, - float max_wh) { - std::vector fd_infer_results(infer_results.size()); - PyArrayToTensorList(infer_results, &fd_infer_results, true); - vision::DetectionResult result; - vision::detection::YOLOv5::Postprocess( - fd_infer_results, &result, im_info, conf_threshold, - nms_threshold, multi_label, max_wh); - return result; - }) - .def_readwrite("size", &vision::detection::YOLOv5::size_) - .def_readwrite("padding_value", - &vision::detection::YOLOv5::padding_value_) - .def_readwrite("is_mini_pad", &vision::detection::YOLOv5::is_mini_pad_) - .def_readwrite("is_no_pad", &vision::detection::YOLOv5::is_no_pad_) - .def_readwrite("is_scale_up", &vision::detection::YOLOv5::is_scale_up_) - .def_readwrite("stride", &vision::detection::YOLOv5::stride_) - .def_readwrite("max_wh", &vision::detection::YOLOv5::max_wh_) - .def_readwrite("multi_label", &vision::detection::YOLOv5::multi_label_); + .def("batch_predict", [](vision::detection::YOLOv5& self, std::vector& data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly("preprocessor", &vision::detection::YOLOv5::GetPreprocessor) + .def_property_readonly("postprocessor", &vision::detection::YOLOv5::GetPostprocessor); } } // namespace fastdeploy diff --git a/python/fastdeploy/vision/detection/contrib/yolov5.py b/python/fastdeploy/vision/detection/contrib/yolov5.py index 5ecef307bc8..e0b5138becf 100644 --- a/python/fastdeploy/vision/detection/contrib/yolov5.py +++ b/python/fastdeploy/vision/detection/contrib/yolov5.py @@ -18,109 +18,38 @@ from .... import c_lib_wrap as C -class YOLOv5(FastDeployModel): - def __init__(self, - model_file, - params_file="", - runtime_option=None, - model_format=ModelFormat.ONNX): - """Load a YOLOv5 model exported by YOLOv5. - - :param model_file: (str)Path of model file, e.g ./yolov5.onnx - :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string - :param runtime_option: (fastdeploy.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU - :param model_format: (fastdeploy.ModelForamt)Model format of the loaded model +class YOLOv5Preprocessor: + def __init__(self): + """Create a preprocessor for YOLOv5 """ - # 调用基函数进行backend_option的初始化 - # 初始化后的option保存在self._runtime_option - super(YOLOv5, self).__init__(runtime_option) + self._preprocessor = C.vision.detection.YOLOv5Preprocessor() - self._model = C.vision.detection.YOLOv5( - model_file, params_file, self._runtime_option, model_format) - # 通过self.initialized判断整个模型的初始化是否成功 - assert self.initialized, "YOLOv5 initialize failed." + def run(self, input_ims): + """Preprocess input images for YOLOv5 - def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5): - """Detect an input image + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor + """ + return self._preprocessor.run(input_ims) - :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format - :param conf_threshold: confidence threashold for postprocessing, default is 0.25 - :param nms_iou_threshold: iou threashold for NMS, default is 0.5 - :return: DetectionResult + def use_cuda_preprocessing(self, max_image_size): + """Preprocess input images by CUDA + + :param: max_image_size: (int)Set max_image_size """ - return self._model.predict(input_image, conf_threshold, - nms_iou_threshold) - - @staticmethod - def preprocess(input_image, - size=[640, 640], - padding_value=[114.0, 114.0, 114.0], - is_mini_pad=False, - is_no_pad=False, - is_scale_up=False, - stride=32, - max_wh=7680.0, - multi_label=True): - return C.vision.detection.YOLOv5.preprocess( - input_image, size, padding_value, is_mini_pad, is_no_pad, - is_scale_up, stride, max_wh, multi_label) - - @staticmethod - def postprocess(infer_result, - im_info, - conf_threshold=0.25, - nms_iou_threshold=0.5, - multi_label=True, - max_wh=7680.0): - return C.vision.detection.YOLOv5.postprocess( - infer_result, im_info, conf_threshold, nms_iou_threshold, - multi_label, max_wh) - - # 一些跟YOLOv5模型有关的属性封装 - # 多数是预处理相关,可通过修改如model.size = [1280, 1280]改变预处理时resize的大小(前提是模型支持) + return self._preprocessor.use_cuda_preprocessing(max_image_size) + @property def size(self): """ Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640] """ - return self._model.size + return self._preprocessor.size @property def padding_value(self): # padding value, size should be the same as channels - return self._model.padding_value - - @property - def is_no_pad(self): - # while is_mini_pad = false and is_no_pad = true, will resize the image to the set size - return self._model.is_no_pad - - @property - def is_mini_pad(self): - # only pad to the minimum rectange which height and width is times of stride - return self._model.is_mini_pad - - @property - def is_scale_up(self): - # if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0 - return self._model.is_scale_up - - @property - def stride(self): - # padding stride, for is_mini_pad - return self._model.stride - - @property - def max_wh(self): - # for offseting the boxes by classes when using NMS - return self._model.max_wh - - @property - def multi_label(self): - """ - Argument for image preprocessing step, for different strategies to get boxes when postprocessing, default True - """ - return self._model.multi_label + return self._preprocessor.padding_value @size.setter def size(self, wh): @@ -129,50 +58,129 @@ def size(self, wh): assert len(wh) == 2,\ "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( len(wh)) - self._model.size = wh + self._preprocessor.size = wh @padding_value.setter def padding_value(self, value): assert isinstance( value, list), "The value to set `padding_value` must be type of list." - self._model.padding_value = value + self._preprocessor.padding_value = value - @is_no_pad.setter - def is_no_pad(self, value): - assert isinstance( - value, bool), "The value to set `is_no_pad` must be type of bool." - self._model.is_no_pad = value - @is_mini_pad.setter - def is_mini_pad(self, value): - assert isinstance( - value, - bool), "The value to set `is_mini_pad` must be type of bool." - self._model.is_mini_pad = value +class YOLOv5Postprocessor: + def __init__(self): + """Create a postprocessor for YOLOv5 + """ + self._postprocessor = C.vision.detection.YOLOv5Postprocessor() - @is_scale_up.setter - def is_scale_up(self, value): - assert isinstance( - value, - bool), "The value to set `is_scale_up` must be type of bool." - self._model.is_scale_up = value + def run(self, runtime_results, im_info): + """Postprocess the runtime results for YOLOv5 - @stride.setter - def stride(self, value): - assert isinstance( - value, int), "The value to set `stride` must be type of int." - self._model.stride = value + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :param: im_info: (dict)Record input_shape and output_shape + :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results, im_info) - @max_wh.setter - def max_wh(self, value): - assert isinstance( - value, float), "The value to set `max_wh` must be type of float." - self._model.max_wh = value + @property + def conf_threshold(self): + """ + confidence threshold for postprocessing, default is 0.25 + """ + return self._postprocessor.conf_threshold + + @property + def nms_threshold(self): + """ + nms threshold for postprocessing, default is 0.5 + """ + return self._postprocessor.nms_threshold + + @property + def multi_label(self): + """ + multi_label for postprocessing, default is true + """ + return self._postprocessor.multi_label + + @conf_threshold.setter + def conf_threshold(self, conf_threshold): + assert isinstance(conf_threshold, float),\ + "The value to set `conf_threshold` must be type of float." + self._postprocessor.conf_threshold = conf_threshold + + @nms_threshold.setter + def nms_threshold(self, nms_threshold): + assert isinstance(nms_threshold, float),\ + "The value to set `nms_threshold` must be type of float." + self._postprocessor.nms_threshold = nms_threshold @multi_label.setter def multi_label(self, value): assert isinstance( value, bool), "The value to set `multi_label` must be type of bool." - self._model.multi_label = value + self._postprocessor.multi_label = value + + +class YOLOv5(FastDeployModel): + def __init__(self, + model_file, + params_file="", + runtime_option=None, + model_format=ModelFormat.ONNX): + """Load a YOLOv5 model exported by YOLOv5. + + :param model_file: (str)Path of model file, e.g ./yolov5.onnx + :param params_file: (str)Path of parameters file, e.g yolox/model.pdiparams, if the model_fomat is ModelFormat.ONNX, this param will be ignored, can be set as empty string + :param runtime_option: (fastdeploy.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU + :param model_format: (fastdeploy.ModelForamt)Model format of the loaded model + """ + # 调用基函数进行backend_option的初始化 + # 初始化后的option保存在self._runtime_option + super(YOLOv5, self).__init__(runtime_option) + + assert model_format == ModelFormat.ONNX, "YOLOv5 only support model format of ModelFormat.ONNX now." + self._model = C.vision.detection.YOLOv5( + model_file, params_file, self._runtime_option, model_format) + # 通过self.initialized判断整个模型的初始化是否成功 + assert self.initialized, "YOLOv5 initialize failed." + + def predict(self, input_image, conf_threshold=0.25, nms_threshold=0.5): + """Detect an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :param conf_threshold: confidence threshold for postprocessing, default is 0.25 + :param nms_threshold: iou threshold for NMS, default is 0.5 + :return: DetectionResult + """ + + self.postprocessor.conf_threshold = conf_threshold + self.postprocessor.nms_threshold = nms_threshold + return self._model.predict(input_image) + + def batch_predict(self, images): + """Classify a batch of input image + + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of DetectionResult + """ + + return self._model.batch_predict(images) + + @property + def preprocessor(self): + """Get YOLOv5Preprocessor object of the loaded model + + :return YOLOv5Preprocessor + """ + return self._model.preprocessor + + @property + def postprocessor(self): + """Get YOLOv5Postprocessor object of the loaded model + + :return YOLOv5Postprocessor + """ + return self._model.postprocessor From 92dc3522372e3065b4aa551a27622aabc14cd4dd Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Thu, 10 Nov 2022 09:14:32 +0000 Subject: [PATCH 20/50] rm origin yolov5 --- fastdeploy/vision/detection/contrib/yolov5.cc | 384 ------------------ fastdeploy/vision/detection/contrib/yolov5.h | 136 ------- .../detection/contrib/yolov5/postprocessor.cc | 1 - .../detection/contrib/yolov5/postprocessor.h | 7 +- .../detection/contrib/yolov5/preprocessor.cc | 3 - .../detection/contrib/yolov5/preprocessor.h | 7 +- .../vision/detection/contrib/yolov5_pybind.cc | 73 ---- 7 files changed, 9 insertions(+), 602 deletions(-) delete mode 100755 fastdeploy/vision/detection/contrib/yolov5.cc delete mode 100644 fastdeploy/vision/detection/contrib/yolov5.h delete mode 100644 fastdeploy/vision/detection/contrib/yolov5_pybind.cc diff --git a/fastdeploy/vision/detection/contrib/yolov5.cc b/fastdeploy/vision/detection/contrib/yolov5.cc deleted file mode 100755 index ba5b22363ec..00000000000 --- a/fastdeploy/vision/detection/contrib/yolov5.cc +++ /dev/null @@ -1,384 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/detection/contrib/yolov5.h" - -#include "fastdeploy/utils/perf.h" -#include "fastdeploy/vision/utils/utils.h" -#ifdef ENABLE_CUDA_PREPROCESS -#include "fastdeploy/vision/utils/cuda_utils.h" -#endif // ENABLE_CUDA_PREPROCESS - -namespace fastdeploy { -namespace vision { -namespace detection { - -void YOLOv5::LetterBox(Mat* mat, std::vector size, - std::vector color, bool _auto, bool scale_fill, - bool scale_up, int stride) { - float scale = - std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width()); - if (!scale_up) { - scale = std::min(scale, 1.0f); - } - - int resize_h = int(round(mat->Height() * scale)); - int resize_w = int(round(mat->Width() * scale)); - - int pad_w = size[0] - resize_w; - int pad_h = size[1] - resize_h; - if (_auto) { - pad_h = pad_h % stride; - pad_w = pad_w % stride; - } else if (scale_fill) { - pad_h = 0; - pad_w = 0; - resize_h = size[1]; - resize_w = size[0]; - } - Resize::Run(mat, resize_w, resize_h); - if (pad_h > 0 || pad_w > 0) { - float half_h = pad_h * 1.0 / 2; - int top = int(round(half_h - 0.1)); - int bottom = int(round(half_h + 0.1)); - float half_w = pad_w * 1.0 / 2; - int left = int(round(half_w - 0.1)); - int right = int(round(half_w + 0.1)); - Pad::Run(mat, top, bottom, left, right, color); - } -} - -YOLOv5::YOLOv5(const std::string& model_file, const std::string& params_file, - const RuntimeOption& custom_option, - const ModelFormat& model_format) { - if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; - valid_gpu_backends = {Backend::ORT, Backend::TRT}; - } else { - valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; - valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; - } - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; -#ifdef ENABLE_CUDA_PREPROCESS - cudaSetDevice(runtime_option.device_id); - cudaStream_t stream; - CUDA_CHECK(cudaStreamCreate(&stream)); - cuda_stream_ = reinterpret_cast(stream); - runtime_option.SetExternalStream(cuda_stream_); -#endif // ENABLE_CUDA_PREPROCESS - initialized = Initialize(); -} - -bool YOLOv5::Initialize() { - // parameters for preprocess - size_ = {640, 640}; - padding_value_ = {114.0, 114.0, 114.0}; - is_mini_pad_ = false; - is_no_pad_ = false; - is_scale_up_ = false; - stride_ = 32; - max_wh_ = 7680.0; - multi_label_ = true; - reused_input_tensors_.resize(1); - - if (!InitRuntime()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - // Check if the input shape is dynamic after Runtime already initialized, - // Note that, We need to force is_mini_pad 'false' to keep static - // shape after padding (LetterBox) when the is_dynamic_shape is 'false'. - // TODO(qiuyanjun): remove - // is_dynamic_input_ = false; - // auto shape = InputInfoOfRuntime(0).shape; - // for (int i = 0; i < shape.size(); ++i) { - // // if height or width is dynamic - // if (i >= 2 && shape[i] <= 0) { - // is_dynamic_input_ = true; - // break; - // } - // } - // if (!is_dynamic_input_) { - // is_mini_pad_ = false; - // } - - return true; -} - -YOLOv5::~YOLOv5() { -#ifdef ENABLE_CUDA_PREPROCESS - if (use_cuda_preprocessing_) { - CUDA_CHECK(cudaFreeHost(input_img_cuda_buffer_host_)); - CUDA_CHECK(cudaFree(input_img_cuda_buffer_device_)); - CUDA_CHECK(cudaFree(input_tensor_cuda_buffer_device_)); - CUDA_CHECK(cudaStreamDestroy(reinterpret_cast(cuda_stream_))); - } -#endif // ENABLE_CUDA_PREPROCESS -} - -bool YOLOv5::Preprocess(Mat* mat, FDTensor* output, - std::map>* im_info, - const std::vector& size, - const std::vector padding_value, - bool is_mini_pad, bool is_no_pad, bool is_scale_up, - int stride, float max_wh, bool multi_label) { - // Record the shape of image and the shape of preprocessed image - (*im_info)["input_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - (*im_info)["output_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - - // process after image load - double ratio = (size[0] * 1.0) / std::max(static_cast(mat->Height()), - static_cast(mat->Width())); - if (ratio != 1.0) { - int interp = cv::INTER_AREA; - if (ratio > 1.0) { - interp = cv::INTER_LINEAR; - } - int resize_h = int(mat->Height() * ratio); - int resize_w = int(mat->Width() * ratio); - Resize::Run(mat, resize_w, resize_h, -1, -1, interp); - } - // yolov5's preprocess steps - // 1. letterbox - // 2. BGR->RGB - // 3. HWC->CHW - LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, is_scale_up, - stride); - BGR2RGB::Run(mat); - // Normalize::Run(mat, std::vector(mat->Channels(), 0.0), - // std::vector(mat->Channels(), 1.0)); - // Compute `result = mat * alpha + beta` directly by channel - std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; - std::vector beta = {0.0f, 0.0f, 0.0f}; - Convert::Run(mat, alpha, beta); - - // Record output shape of preprocessed image - (*im_info)["output_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); - mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c - return true; -} - -void YOLOv5::UseCudaPreprocessing(int max_image_size) { -#ifdef ENABLE_CUDA_PREPROCESS - use_cuda_preprocessing_ = true; - is_scale_up_ = true; - if (input_img_cuda_buffer_host_ == nullptr) { - // prepare input data cache in GPU pinned memory - CUDA_CHECK(cudaMallocHost((void**)&input_img_cuda_buffer_host_, - max_image_size * 3)); - // prepare input data cache in GPU device memory - CUDA_CHECK( - cudaMalloc((void**)&input_img_cuda_buffer_device_, max_image_size * 3)); - CUDA_CHECK(cudaMalloc((void**)&input_tensor_cuda_buffer_device_, - 3 * size_[0] * size_[1] * sizeof(float))); - } -#else - FDWARNING << "The FastDeploy didn't compile with BUILD_CUDA_SRC=ON." - << std::endl; - use_cuda_preprocessing_ = false; -#endif -} - -bool YOLOv5::CudaPreprocess( - Mat* mat, FDTensor* output, - std::map>* im_info, - const std::vector& size, const std::vector padding_value, - bool is_mini_pad, bool is_no_pad, bool is_scale_up, int stride, - float max_wh, bool multi_label) { -#ifdef ENABLE_CUDA_PREPROCESS - if (is_mini_pad != false || is_no_pad != false || is_scale_up != true) { - FDERROR << "Preprocessing with CUDA is only available when the arguments " - "satisfy (is_mini_pad=false, is_no_pad=false, is_scale_up=true)." - << std::endl; - return false; - } - - // Record the shape of image and the shape of preprocessed image - (*im_info)["input_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - (*im_info)["output_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - - cudaStream_t stream = reinterpret_cast(cuda_stream_); - int src_img_buf_size = mat->Height() * mat->Width() * mat->Channels(); - memcpy(input_img_cuda_buffer_host_, mat->Data(), src_img_buf_size); - CUDA_CHECK(cudaMemcpyAsync(input_img_cuda_buffer_device_, - input_img_cuda_buffer_host_, src_img_buf_size, - cudaMemcpyHostToDevice, stream)); - utils::CudaYoloPreprocess(input_img_cuda_buffer_device_, mat->Width(), - mat->Height(), input_tensor_cuda_buffer_device_, - size[0], size[1], padding_value, stream); - - // Record output shape of preprocessed image - (*im_info)["output_shape"] = {static_cast(size[0]), - static_cast(size[1])}; - - output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32, - input_tensor_cuda_buffer_device_); - output->device = Device::GPU; - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c - return true; -#else - FDERROR << "CUDA src code was not enabled." << std::endl; - return false; -#endif // ENABLE_CUDA_PREPROCESS -} - -bool YOLOv5::Postprocess( - std::vector& infer_results, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold, bool multi_label, - float max_wh) { - auto& infer_result = infer_results[0]; - FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); - result->Clear(); - if (multi_label) { - result->Reserve(infer_result.shape[1] * (infer_result.shape[2] - 5)); - } else { - result->Reserve(infer_result.shape[1]); - } - if (infer_result.dtype != FDDataType::FP32) { - FDERROR << "Only support post process with float32 data." << std::endl; - return false; - } - float* data = static_cast(infer_result.Data()); - for (size_t i = 0; i < infer_result.shape[1]; ++i) { - int s = i * infer_result.shape[2]; - float confidence = data[s + 4]; - if (multi_label) { - for (size_t j = 5; j < infer_result.shape[2]; ++j) { - confidence = data[s + 4]; - float* class_score = data + s + j; - confidence *= (*class_score); - // filter boxes by conf_threshold - if (confidence <= conf_threshold) { - continue; - } - int32_t label_id = std::distance(data + s + 5, class_score); - - // convert from [x, y, w, h] to [x1, y1, x2, y2] - result->boxes.emplace_back(std::array{ - data[s] - data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh, - data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh}); - result->label_ids.push_back(label_id); - result->scores.push_back(confidence); - } - } else { - float* max_class_score = - std::max_element(data + s + 5, data + s + infer_result.shape[2]); - confidence *= (*max_class_score); - // filter boxes by conf_threshold - if (confidence <= conf_threshold) { - continue; - } - int32_t label_id = std::distance(data + s + 5, max_class_score); - // convert from [x, y, w, h] to [x1, y1, x2, y2] - result->boxes.emplace_back(std::array{ - data[s] - data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh, - data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh}); - result->label_ids.push_back(label_id); - result->scores.push_back(confidence); - } - } - - if (result->boxes.size() == 0) { - return true; - } - - utils::NMS(result, nms_iou_threshold); - - // scale the boxes to the origin image shape - auto iter_out = im_info.find("output_shape"); - auto iter_ipt = im_info.find("input_shape"); - FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), - "Cannot find input_shape or output_shape from im_info."); - float out_h = iter_out->second[0]; - float out_w = iter_out->second[1]; - float ipt_h = iter_ipt->second[0]; - float ipt_w = iter_ipt->second[1]; - float scale = std::min(out_h / ipt_h, out_w / ipt_w); - for (size_t i = 0; i < result->boxes.size(); ++i) { - float pad_h = (out_h - ipt_h * scale) / 2; - float pad_w = (out_w - ipt_w * scale) / 2; - int32_t label_id = (result->label_ids)[i]; - // clip box - result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; - result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; - result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; - result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; - result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f); - result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f); - result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f); - result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f); - result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w); - result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h); - result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w); - result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h); - } - return true; -} - -bool YOLOv5::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, - float nms_iou_threshold) { - Mat mat(*im); - - std::map> im_info; - - if (use_cuda_preprocessing_) { - if (!CudaPreprocess(&mat, &reused_input_tensors_[0], &im_info, size_, - padding_value_, is_mini_pad_, is_no_pad_, is_scale_up_, - stride_, max_wh_, multi_label_)) { - FDERROR << "Failed to preprocess input image." << std::endl; - return false; - } - } else { - if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info, size_, - padding_value_, is_mini_pad_, is_no_pad_, is_scale_up_, - stride_, max_wh_, multi_label_)) { - FDERROR << "Failed to preprocess input image." << std::endl; - return false; - } - } - - reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; - if (!Infer()) { - FDERROR << "Failed to inference." << std::endl; - return false; - } - - if (!Postprocess(reused_output_tensors_, result, im_info, conf_threshold, - nms_iou_threshold, multi_label_)) { - FDERROR << "Failed to post process." << std::endl; - return false; - } - return true; -} - -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov5.h b/fastdeploy/vision/detection/contrib/yolov5.h deleted file mode 100644 index 4c46acd0a21..00000000000 --- a/fastdeploy/vision/detection/contrib/yolov5.h +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" - -namespace fastdeploy { -namespace vision { -namespace detection { -/*! @brief YOLOv5 model object used when to load a YOLOv5 model exported by YOLOv5. - */ -class FASTDEPLOY_DECL YOLOv5 : public FastDeployModel { - public: - /** \brief Set path of model file and the configuration of runtime. - * - * \param[in] model_file Path of model file, e.g ./yolov5.onnx - * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored - * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" - * \param[in] model_format Model format of the loaded model, default is ONNX format - */ - YOLOv5(const std::string& model_file, const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const ModelFormat& model_format = ModelFormat::ONNX); - - ~YOLOv5(); - - std::string ModelName() const { return "yolov5"; } - /** \brief Predict the detection result for an input image - * - * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format - * \param[in] result The output detection result will be writen to this structure - * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.25 - * \param[in] nms_iou_threshold iou threashold for NMS, default is 0.5 - * \return true if the prediction successed, otherwise false - */ - virtual bool Predict(cv::Mat* im, DetectionResult* result, - float conf_threshold = 0.25, - float nms_iou_threshold = 0.5); - - static bool Preprocess(Mat* mat, FDTensor* output, - std::map>* im_info, - const std::vector& size = {640, 640}, - const std::vector padding_value = {114.0, 114.0, - 114.0}, - bool is_mini_pad = false, bool is_no_pad = false, - bool is_scale_up = false, int stride = 32, - float max_wh = 7680.0, bool multi_label = true); - - void UseCudaPreprocessing(int max_img_size = 3840 * 2160); - - bool CudaPreprocess(Mat* mat, FDTensor* output, - std::map>* im_info, - const std::vector& size = {640, 640}, - const std::vector padding_value = {114.0, 114.0, - 114.0}, - bool is_mini_pad = false, bool is_no_pad = false, - bool is_scale_up = false, int stride = 32, - float max_wh = 7680.0, bool multi_label = true); - - static bool Postprocess( - std::vector& infer_results, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold, bool multi_label, - float max_wh = 7680.0); - - /*! @brief - Argument for image preprocessing step, tuple of (width, height), decide the target size after resize, default size = {640, 640} - */ - std::vector size_; - // padding value, size should be the same as channels - - std::vector padding_value_; - // only pad to the minimum rectange which height and width is times of stride - bool is_mini_pad_; - // while is_mini_pad = false and is_no_pad = true, - // will resize the image to the set size - bool is_no_pad_; - // if is_scale_up is false, the input image only can be zoom out, - // the maximum resize scale cannot exceed 1.0 - bool is_scale_up_; - // padding stride, for is_mini_pad - int stride_; - // for offseting the boxes by classes when using NMS - float max_wh_; - /*! @brief - Argument for image preprocessing step, for different strategies to get boxes when postprocessing, default true - */ - bool multi_label_; - - private: - bool Initialize(); - - bool IsDynamicInput() const { return is_dynamic_input_; } - - static void LetterBox(Mat* mat, std::vector size, - std::vector color, bool _auto, - bool scale_fill = false, bool scale_up = true, - int stride = 32); - - // whether to inference with dynamic shape (e.g ONNX export with dynamic shape - // or not.) - // YOLOv5 official 'export_onnx.py' script will export dynamic ONNX by - // default. - // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This - // value will - // auto check by fastdeploy after the internal Runtime already initialized. - bool is_dynamic_input_; - // CUDA host buffer for input image - uint8_t* input_img_cuda_buffer_host_ = nullptr; - // CUDA device buffer for input image - uint8_t* input_img_cuda_buffer_device_ = nullptr; - // CUDA device buffer for TRT input tensor - float* input_tensor_cuda_buffer_device_ = nullptr; - // Whether to use CUDA preprocessing - bool use_cuda_preprocessing_ = false; - // CUDA stream - void* cuda_stream_ = nullptr; -}; - -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc index efc5157c117..d093abb1826 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc @@ -13,7 +13,6 @@ // limitations under the License. #include "fastdeploy/vision/classification/ppcls/postprocessor.h" -#include "fastdeploy/vision/utils/utils.h" namespace fastdeploy { namespace vision { diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h index c40624820b6..166d956c2d7 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h @@ -15,6 +15,7 @@ #pragma once #include "fastdeploy/vision/common/processors/transform.h" #include "fastdeploy/vision/common/result.h" +#include "fastdeploy/vision/utils/utils.h" namespace fastdeploy { namespace vision { @@ -45,7 +46,7 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor { } /// Get conf_threshold, default 0.25 - void GetConfThreshold() const { return conf_threshold_; } + float GetConfThreshold() const { return conf_threshold_; } /// Set nms_threshold, default 0.5 void SetNMSThreshold(float nms_threshold) { @@ -53,7 +54,7 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor { } /// Get nms_threshold, default 0.5 - void GetNMSThreshold() const { return nms_threshold_; } + float GetNMSThreshold() const { return nms_threshold_; } /// Set multi_label, default true void SetMultiLabel(bool multi_label) { @@ -61,7 +62,7 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor { } /// Get multi_label, default true - void GetMultiLabel() const { return multi_label_; } + bool GetMultiLabel() const { return multi_label_; } private: bool Postprocess(const std::vector& tensors, diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc index 61de32d2770..1c1f1595a12 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc @@ -14,9 +14,6 @@ #include "fastdeploy/vision/detection/contrib/yolov5/preprocessor.h" #include "fastdeploy/function/concat.h" -#ifdef ENABLE_CUDA_PREPROCESS -#include "fastdeploy/vision/utils/cuda_utils.h" -#endif // ENABLE_CUDA_PREPROCESS namespace fastdeploy { namespace vision { diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h index b57af35909f..e1f389c5024 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h +++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h @@ -15,6 +15,9 @@ #pragma once #include "fastdeploy/vision/common/processors/transform.h" #include "fastdeploy/vision/common/result.h" +#ifdef ENABLE_CUDA_PREPROCESS +#include "fastdeploy/vision/utils/cuda_utils.h" +#endif // ENABLE_CUDA_PREPROCESS namespace fastdeploy { namespace vision { @@ -42,7 +45,7 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor { void SetSize(std::vector size) { size_ = size; } /// Get target size, tuple of (width, height), default size = {640, 640} - void GetSize() const { return size_; } + std::vector GetSize() const { return size_; } /// Set padding value, size should be the same as channels void SetPaddingValue(std::vector padding_value) { @@ -50,7 +53,7 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor { } /// Get padding value, size should be the same as channels - void GetPaddingValue() const { return padding_value_; } + std::vector GetPaddingValue() const { return padding_value_; } /// Use Cuda Preprocess void UseCudaPreprocessing(int max_img_size = 3840 * 2160); diff --git a/fastdeploy/vision/detection/contrib/yolov5_pybind.cc b/fastdeploy/vision/detection/contrib/yolov5_pybind.cc deleted file mode 100644 index 52d0d78c9b2..00000000000 --- a/fastdeploy/vision/detection/contrib/yolov5_pybind.cc +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindYOLOv5(pybind11::module& m) { - pybind11::class_(m, "YOLOv5") - .def(pybind11::init()) - .def("predict", - [](vision::detection::YOLOv5& self, pybind11::array& data, - float conf_threshold, float nms_iou_threshold) { - auto mat = PyArrayToCvMat(data); - vision::DetectionResult res; - self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); - return res; - }) - .def("use_cuda_preprocessing", - [](vision::detection::YOLOv5& self, int max_image_size) { - self.UseCudaPreprocessing(max_image_size); - }) - .def_static("preprocess", - [](pybind11::array& data, const std::vector& size, - const std::vector padding_value, bool is_mini_pad, - bool is_no_pad, bool is_scale_up, int stride, float max_wh, - bool multi_label) { - auto mat = PyArrayToCvMat(data); - fastdeploy::vision::Mat fd_mat(mat); - FDTensor output; - std::map> im_info; - vision::detection::YOLOv5::Preprocess( - &fd_mat, &output, &im_info, size, padding_value, - is_mini_pad, is_no_pad, is_scale_up, stride, max_wh, - multi_label); - return make_pair(TensorToPyArray(output), im_info); - }) - .def_static( - "postprocess", - [](std::vector infer_results, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold, bool multi_label, - float max_wh) { - std::vector fd_infer_results(infer_results.size()); - PyArrayToTensorList(infer_results, &fd_infer_results, true); - vision::DetectionResult result; - vision::detection::YOLOv5::Postprocess( - fd_infer_results, &result, im_info, conf_threshold, - nms_iou_threshold, multi_label, max_wh); - return result; - }) - .def_readwrite("size", &vision::detection::YOLOv5::size_) - .def_readwrite("padding_value", - &vision::detection::YOLOv5::padding_value_) - .def_readwrite("is_mini_pad", &vision::detection::YOLOv5::is_mini_pad_) - .def_readwrite("is_no_pad", &vision::detection::YOLOv5::is_no_pad_) - .def_readwrite("is_scale_up", &vision::detection::YOLOv5::is_scale_up_) - .def_readwrite("stride", &vision::detection::YOLOv5::stride_) - .def_readwrite("max_wh", &vision::detection::YOLOv5::max_wh_) - .def_readwrite("multi_label", &vision::detection::YOLOv5::multi_label_); -} -} // namespace fastdeploy From 983301b410220e56fdaadb48f27b71d08d2bb12e Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Thu, 10 Nov 2022 09:24:20 +0000 Subject: [PATCH 21/50] fixed bugs --- .../detection/contrib/yolov5/postprocessor.cc | 56 +++++++++---------- .../vision/detection/contrib/yolov5/yolov5.cc | 2 +- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc index d093abb1826..5a8d9d6e2da 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "fastdeploy/vision/classification/ppcls/postprocessor.h" +#include "fastdeploy/vision/detection/contrib/yolov5/postprocessor.h" namespace fastdeploy { namespace vision { -namespace classification { +namespace detection { YOLOv5Postprocessor::YOLOv5Postprocessor() { conf_threshold_ = 0.25; @@ -31,11 +31,11 @@ bool YOLOv5Postprocessor::Postprocess( const std::map>& im_info) { auto& infer_result = infer_results[0]; for (size_t bs = 0; bs < results->size(); ++bs) { - *results[bs].Clear(); + (*results)[bs].Clear(); if (multi_label_) { - *results[bs].Reserve(infer_result.shape[1] * (infer_result.shape[2] - 5)); + (*results)[bs].Reserve(infer_result.shape[1] * (infer_result.shape[2] - 5)); } else { - *results[bs].Reserve(infer_result.shape[1]); + (*results)[bs].Reserve(infer_result.shape[1]); } if (infer_result.dtype != FDDataType::FP32) { FDERROR << "Only support post process with float32 data." << std::endl; @@ -57,13 +57,13 @@ bool YOLOv5Postprocessor::Postprocess( int32_t label_id = std::distance(data + s + 5, class_score); // convert from [x, y, w, h] to [x1, y1, x2, y2] - *results[bs].boxes.emplace_back(std::array{ + (*results)[bs].boxes.emplace_back(std::array{ data[s] - data[s + 2] / 2.0f + label_id * max_wh_, data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_, data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_, data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_}); - *results[bs].label_ids.push_back(label_id); - *results[bs].scores.push_back(confidence); + (*results)[bs].label_ids.push_back(label_id); + (*results)[bs].scores.push_back(confidence); } } else { float* max_class_score = @@ -75,21 +75,21 @@ bool YOLOv5Postprocessor::Postprocess( } int32_t label_id = std::distance(data + s + 5, max_class_score); // convert from [x, y, w, h] to [x1, y1, x2, y2] - *results[bs].boxes.emplace_back(std::array{ + (*results)[bs].boxes.emplace_back(std::array{ data[s] - data[s + 2] / 2.0f + label_id * max_wh_, data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_, data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_, data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_}); - *results[bs].label_ids.push_back(label_id); - *results[bs].scores.push_back(confidence); + (*results)[bs].label_ids.push_back(label_id); + (*results)[bs].scores.push_back(confidence); } } - if (*results[bs].boxes.size() == 0) { + if ((*results)[bs].boxes.size() == 0) { return true; } - utils::NMS(&(*results[bs]), nms_threshold_); + utils::NMS(&((*results)[bs]), nms_threshold_); // scale the boxes to the origin image shape auto iter_out = im_info.find("output_shape"); @@ -101,23 +101,23 @@ bool YOLOv5Postprocessor::Postprocess( float ipt_h = iter_ipt->second[0]; float ipt_w = iter_ipt->second[1]; float scale = std::min(out_h / ipt_h, out_w / ipt_w); - for (size_t i = 0; i < *results[bs].boxes.size(); ++i) { + for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) { float pad_h = (out_h - ipt_h * scale) / 2; float pad_w = (out_w - ipt_w * scale) / 2; - int32_t label_id = (*results[bs].label_ids)[i]; + int32_t label_id = ((*results)[bs].label_ids)[i]; // clip box - *results[bs].boxes[i][0] = *results[bs].boxes[i][0] - max_wh * label_id; - *results[bs].boxes[i][1] = *results[bs].boxes[i][1] - max_wh * label_id; - *results[bs].boxes[i][2] = *results[bs].boxes[i][2] - max_wh * label_id; - *results[bs].boxes[i][3] = *results[bs].boxes[i][3] - max_wh * label_id; - *results[bs].boxes[i][0] = std::max((*results[bs].boxes[i][0] - pad_w) / scale, 0.0f); - *results[bs].boxes[i][1] = std::max((*results[bs].boxes[i][1] - pad_h) / scale, 0.0f); - *results[bs].boxes[i][2] = std::max((*results[bs].boxes[i][2] - pad_w) / scale, 0.0f); - *results[bs].boxes[i][3] = std::max((*results[bs].boxes[i][3] - pad_h) / scale, 0.0f); - *results[bs].boxes[i][0] = std::min(*results[bs].boxes[i][0], ipt_w); - *results[bs].boxes[i][1] = std::min(*results[bs].boxes[i][1], ipt_h); - *results[bs].boxes[i][2] = std::min(*results[bs].boxes[i][2], ipt_w); - *results[bs].boxes[i][3] = std::min(*results[bs].boxes[i][3], ipt_h); + (*results)[bs].boxes[i][0] = (*results)[bs].boxes[i][0] - max_wh * label_id; + (*results)[bs].boxes[i][1] = (*results)[bs].boxes[i][1] - max_wh * label_id; + (*results)[bs].boxes[i][2] = (*results)[bs].boxes[i][2] - max_wh * label_id; + (*results)[bs].boxes[i][3] = (*results)[bs].boxes[i][3] - max_wh * label_id; + (*results)[bs].boxes[i][0] = std::max(((*results)[bs].boxes[i][0] - pad_w) / scale, 0.0f); + (*results)[bs].boxes[i][1] = std::max(((*results)[bs].boxes[i][1] - pad_h) / scale, 0.0f); + (*results)[bs].boxes[i][2] = std::max(((*results)[bs].boxes[i][2] - pad_w) / scale, 0.0f); + (*results)[bs].boxes[i][3] = std::max(((*results)[bs].boxes[i][3] - pad_h) / scale, 0.0f); + (*results)[bs].boxes[i][0] = std::min((*results)[bs].boxes[i][0], ipt_w); + (*results)[bs].boxes[i][1] = std::min((*results)[bs].boxes[i][1], ipt_h); + (*results)[bs].boxes[i][2] = std::min((*results)[bs].boxes[i][2], ipt_w); + (*results)[bs].boxes[i][3] = std::min((*results)[bs].boxes[i][3], ipt_h); } } return true; @@ -139,6 +139,6 @@ bool YOLOv5Postprocessor::Run(const std::vector& tensors, std::vector< return false; } return true; -} // namespace classification +} // namespace detection } // namespace vision } // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc index e10ceb9ed2c..7d19d6418af 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "fastdeploy/vision/detection/contrib/yolov5.h" +#include "fastdeploy/vision/detection/contrib/yolov5/yolov5.h" namespace fastdeploy { namespace vision { From 27113e3bc68cee6e98888447fd2e1eb110e08bbc Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Thu, 10 Nov 2022 11:31:49 +0000 Subject: [PATCH 22/50] rm cuda preprocess --- fastdeploy/vision.h | 2 +- .../detection/contrib/yolov5/postprocessor.cc | 1 + .../detection/contrib/yolov5/postprocessor.h | 1 - .../detection/contrib/yolov5/preprocessor.cc | 69 +------------------ .../detection/contrib/yolov5/preprocessor.h | 19 ----- .../vision/detection/contrib/yolov5/yolov5.cc | 18 ----- .../detection/contrib/yolov5/yolov5_pybind.cc | 4 -- .../vision/detection/contrib/yolov5.py | 7 -- 8 files changed, 3 insertions(+), 118 deletions(-) diff --git a/fastdeploy/vision.h b/fastdeploy/vision.h index 44054ee9372..15cc1d009db 100755 --- a/fastdeploy/vision.h +++ b/fastdeploy/vision.h @@ -21,7 +21,7 @@ #include "fastdeploy/vision/detection/contrib/nanodet_plus.h" #include "fastdeploy/vision/detection/contrib/scaledyolov4.h" #include "fastdeploy/vision/detection/contrib/yolor.h" -#include "fastdeploy/vision/detection/contrib/yolov5.h" +#include "fastdeploy/vision/detection/contrib/yolov5/yolov5.h" #include "fastdeploy/vision/detection/contrib/yolov5lite.h" #include "fastdeploy/vision/detection/contrib/yolov6.h" #include "fastdeploy/vision/detection/contrib/yolov7.h" diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc index 5a8d9d6e2da..9156093aad7 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "fastdeploy/vision/detection/contrib/yolov5/postprocessor.h" +#include "fastdeploy/vision/utils/utils.h" namespace fastdeploy { namespace vision { diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h index 166d956c2d7..81d82c9207c 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h @@ -15,7 +15,6 @@ #pragma once #include "fastdeploy/vision/common/processors/transform.h" #include "fastdeploy/vision/common/result.h" -#include "fastdeploy/vision/utils/utils.h" namespace fastdeploy { namespace vision { diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc index 1c1f1595a12..031b4e152e7 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc @@ -103,66 +103,6 @@ bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output, return true; } -void YOLOv5Preprocessor::UseCudaPreprocessing(int max_image_size) { -#ifdef ENABLE_CUDA_PREPROCESS - use_cuda_preprocessing_ = true; - is_scale_up_ = true; - if (input_img_cuda_buffer_host_ == nullptr) { - // prepare input data cache in GPU pinned memory - CUDA_CHECK(cudaMallocHost((void**)&input_img_cuda_buffer_host_, - max_image_size * 3)); - // prepare input data cache in GPU device memory - CUDA_CHECK( - cudaMalloc((void**)&input_img_cuda_buffer_device_, max_image_size * 3)); - CUDA_CHECK(cudaMalloc((void**)&input_tensor_cuda_buffer_device_, - 3 * size_[0] * size_[1] * sizeof(float))); - } -#else - FDWARNING << "The FastDeploy didn't compile with BUILD_CUDA_SRC=ON." - << std::endl; - use_cuda_preprocessing_ = false; -#endif -} - -bool YOLOv5Preprocessor::CudaPreprocess(FDMat* mat, FDTensor* output, - std::map>* im_info) { -#ifdef ENABLE_CUDA_PREPROCESS - if (is_mini_pad_ != false || is_no_pad_ != false || is_scale_up_ != true) { - FDERROR << "Preprocessing with CUDA is only available when the arguments " - "satisfy (is_mini_pad_=false, is_no_pad_=false, is_scale_up_=true)." - << std::endl; - return false; - } - - // Record the shape of image and the shape of preprocessed image - (*im_info)["input_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - - cudaStream_t stream = reinterpret_cast(cuda_stream_); - int src_img_buf_size = mat->Height() * mat->Width() * mat->Channels(); - memcpy(input_img_cuda_buffer_host_, mat->Data(), src_img_buf_size); - CUDA_CHECK(cudaMemcpyAsync(input_img_cuda_buffer_device_, - input_img_cuda_buffer_host_, src_img_buf_size, - cudaMemcpyHostToDevice, stream)); - utils::CudaYoloPreprocess(input_img_cuda_buffer_device_, mat->Width(), - mat->Height(), input_tensor_cuda_buffer_device_, - size_[0], size_[1], padding_value_, stream); - - // Record output shape of preprocessed image - (*im_info)["output_shape"] = {static_cast(size_[0]), - static_cast(size_[1])}; - - output->SetExternalData({mat->Channels(), size_[0], size_[1]}, FDDataType::FP32, - input_tensor_cuda_buffer_device_); - output->device = Device::GPU; - output->ExpandDim(0); // reshape to n, h, w, c - return true; -#else - FDERROR << "CUDA src code was not enabled." << std::endl; - return false; -#endif // ENABLE_CUDA_PREPROCESS -} - bool YOLOv5Preprocessor::Run(std::vector* images, std::vector* outputs, std::map>* im_info) { if (!initialized_) { @@ -177,16 +117,9 @@ bool YOLOv5Preprocessor::Run(std::vector* images, std::vector* // Concat all the preprocessed data to a batch tensor std::vector tensors(images->size()); for (size_t i = 0; i < images->size(); ++i) { - if (use_cuda_preprocessing_) { - if (!CudaPreprocess(&(*images)[i], &tensors[i], im_info)) { + if (!Preprocess(&(*images)[i], &tensors[i], im_info)) { FDERROR << "Failed to preprocess input image." << std::endl; return false; - } - } else { - if (!Preprocess(&(*images)[i], &tensors[i], im_info)) { - FDERROR << "Failed to preprocess input image." << std::endl; - return false; - } } } diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h index e1f389c5024..634531681a1 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h +++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h @@ -15,9 +15,6 @@ #pragma once #include "fastdeploy/vision/common/processors/transform.h" #include "fastdeploy/vision/common/result.h" -#ifdef ENABLE_CUDA_PREPROCESS -#include "fastdeploy/vision/utils/cuda_utils.h" -#endif // ENABLE_CUDA_PREPROCESS namespace fastdeploy { namespace vision { @@ -55,16 +52,10 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor { /// Get padding value, size should be the same as channels std::vector GetPaddingValue() const { return padding_value_; } - /// Use Cuda Preprocess - void UseCudaPreprocessing(int max_img_size = 3840 * 2160); - private: bool Preprocess(FDMat* mat, FDTensor* output, std::map>* im_info); - bool CudaPreprocess(FDMat* mat, FDTensor* output, - std::map>* im_info); - bool IsDynamicInput() const { return is_dynamic_input_; } void LetterBox(FDMat* mat); @@ -101,16 +92,6 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor { // value will // auto check by fastdeploy after the internal Runtime already initialized. bool is_dynamic_input_; - // CUDA host buffer for input image - uint8_t* input_img_cuda_buffer_host_ = nullptr; - // CUDA device buffer for input image - uint8_t* input_img_cuda_buffer_device_ = nullptr; - // CUDA device buffer for TRT input tensor - float* input_tensor_cuda_buffer_device_ = nullptr; - // Whether to use CUDA preprocessing - bool use_cuda_preprocessing_ = false; - // CUDA stream - void* cuda_stream_ = nullptr; }; } // namespace detection diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc index 7d19d6418af..cc2cf083af4 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc @@ -32,13 +32,6 @@ YOLOv5::YOLOv5(const std::string& model_file, const std::string& params_file, runtime_option.model_format = model_format; runtime_option.model_file = model_file; runtime_option.params_file = params_file; -#ifdef ENABLE_CUDA_PREPROCESS - cudaSetDevice(runtime_option.device_id); - cudaStream_t stream; - CUDA_CHECK(cudaStreamCreate(&stream)); - cuda_stream_ = reinterpret_cast(stream); - runtime_option.SetExternalStream(cuda_stream_); -#endif // ENABLE_CUDA_PREPROCESS initialized = Initialize(); } @@ -50,17 +43,6 @@ bool YOLOv5::Initialize() { return true; } -YOLOv5::~YOLOv5() { -#ifdef ENABLE_CUDA_PREPROCESS - if (use_cuda_preprocessing_) { - CUDA_CHECK(cudaFreeHost(input_img_cuda_buffer_host_)); - CUDA_CHECK(cudaFree(input_img_cuda_buffer_device_)); - CUDA_CHECK(cudaFree(input_tensor_cuda_buffer_device_)); - CUDA_CHECK(cudaStreamDestroy(reinterpret_cast(cuda_stream_))); - } -#endif // ENABLE_CUDA_PREPROCESS -} - bool YOLOv5::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, float nms_threshold) { postprocessor_.SetConfThreshold(conf_threshold); postprocessor_.SetNMSThreshold(nms_threshold); diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc index 21bfd830fd4..dc6b7d1d069 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc @@ -31,10 +31,6 @@ void BindYOLOv5(pybind11::module& m) { } return make_pair(outputs, im_info); }) - .def("use_cuda_preprocessing", - [](vision::detection::YOLOv5Preprocessor& self, int max_image_size) { - self.UseCudaPreprocessing(max_image_size); - }) .def_property("size", &vision::detection::YOLOv5Preprocessor::GetSize, &vision::detection::YOLOv5Preprocessor::SetSize) .def_property("padding_value", &vision::detection::YOLOv5Preprocessor::GetPaddingValue, &vision::detection::YOLOv5Preprocessor::SetPaddingValue); diff --git a/python/fastdeploy/vision/detection/contrib/yolov5.py b/python/fastdeploy/vision/detection/contrib/yolov5.py index e0b5138becf..a7fb904c952 100644 --- a/python/fastdeploy/vision/detection/contrib/yolov5.py +++ b/python/fastdeploy/vision/detection/contrib/yolov5.py @@ -32,13 +32,6 @@ def run(self, input_ims): """ return self._preprocessor.run(input_ims) - def use_cuda_preprocessing(self, max_image_size): - """Preprocess input images by CUDA - - :param: max_image_size: (int)Set max_image_size - """ - return self._preprocessor.use_cuda_preprocessing(max_image_size) - @property def size(self): """ From f7d9d20d16dd11c16404613e4b28e82279e8bc6b Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Thu, 10 Nov 2022 11:42:32 +0000 Subject: [PATCH 23/50] fixed bugs --- .../detection/contrib/yolov5/postprocessor.cc | 29 ++++++++++--------- .../vision/detection/contrib/yolov5/yolov5.cc | 2 +- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc index 9156093aad7..9104565a375 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc @@ -24,30 +24,29 @@ YOLOv5Postprocessor::YOLOv5Postprocessor() { nms_threshold_ = 0.5; multi_label_ = true; initialized_ = true; - max_wh_ = 7680.0 + max_wh_ = 7680.0; } bool YOLOv5Postprocessor::Postprocess( const std::vector& infer_results, std::vector* results, const std::map>& im_info) { - auto& infer_result = infer_results[0]; for (size_t bs = 0; bs < results->size(); ++bs) { (*results)[bs].Clear(); if (multi_label_) { - (*results)[bs].Reserve(infer_result.shape[1] * (infer_result.shape[2] - 5)); + (*results)[bs].Reserve(infer_results[0].shape[1] * (infer_results[0].shape[2] - 5)); } else { - (*results)[bs].Reserve(infer_result.shape[1]); + (*results)[bs].Reserve(infer_results[0].shape[1]); } - if (infer_result.dtype != FDDataType::FP32) { + if (infer_results[0].dtype != FDDataType::FP32) { FDERROR << "Only support post process with float32 data." << std::endl; return false; } - float* data = static_cast(infer_result.Data()) + bs * infer_result.shape[1] * infer_result.shape[2]; - for (size_t i = 0; i < infer_result.shape[1]; ++i) { - int s = i * infer_result.shape[2]; + float* data = reinterpret_cast(infer_results[0].Data()) + bs * infer_results[0].shape[1] * infer_results[0].shape[2]; + for (size_t i = 0; i < infer_results[0].shape[1]; ++i) { + int s = i * infer_results[0].shape[2]; float confidence = data[s + 4]; if (multi_label_) { - for (size_t j = 5; j < infer_result.shape[2]; ++j) { + for (size_t j = 5; j < infer_results[0].shape[2]; ++j) { confidence = data[s + 4]; float* class_score = data + s + j; confidence *= (*class_score); @@ -68,7 +67,7 @@ bool YOLOv5Postprocessor::Postprocess( } } else { float* max_class_score = - std::max_element(data + s + 5, data + s + infer_result.shape[2]); + std::max_element(data + s + 5, data + s + infer_results[0].shape[2]); confidence *= (*max_class_score); // filter boxes by conf_threshold if (confidence <= conf_threshold_) { @@ -107,10 +106,10 @@ bool YOLOv5Postprocessor::Postprocess( float pad_w = (out_w - ipt_w * scale) / 2; int32_t label_id = ((*results)[bs].label_ids)[i]; // clip box - (*results)[bs].boxes[i][0] = (*results)[bs].boxes[i][0] - max_wh * label_id; - (*results)[bs].boxes[i][1] = (*results)[bs].boxes[i][1] - max_wh * label_id; - (*results)[bs].boxes[i][2] = (*results)[bs].boxes[i][2] - max_wh * label_id; - (*results)[bs].boxes[i][3] = (*results)[bs].boxes[i][3] - max_wh * label_id; + (*results)[bs].boxes[i][0] = (*results)[bs].boxes[i][0] - max_wh_ * label_id; + (*results)[bs].boxes[i][1] = (*results)[bs].boxes[i][1] - max_wh_ * label_id; + (*results)[bs].boxes[i][2] = (*results)[bs].boxes[i][2] - max_wh_ * label_id; + (*results)[bs].boxes[i][3] = (*results)[bs].boxes[i][3] - max_wh_ * label_id; (*results)[bs].boxes[i][0] = std::max(((*results)[bs].boxes[i][0] - pad_w) / scale, 0.0f); (*results)[bs].boxes[i][1] = std::max(((*results)[bs].boxes[i][1] - pad_h) / scale, 0.0f); (*results)[bs].boxes[i][2] = std::max(((*results)[bs].boxes[i][2] - pad_w) / scale, 0.0f); @@ -140,6 +139,8 @@ bool YOLOv5Postprocessor::Run(const std::vector& tensors, std::vector< return false; } return true; +} + } // namespace detection } // namespace vision } // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc index cc2cf083af4..b0df6b4e955 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc @@ -76,7 +76,7 @@ bool YOLOv5::BatchPredict(const std::vector& images, std::vector Date: Thu, 10 Nov 2022 11:45:57 +0000 Subject: [PATCH 24/50] fixed bugs --- fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc index 9104565a375..5f0cb9a6f7c 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc @@ -41,14 +41,14 @@ bool YOLOv5Postprocessor::Postprocess( FDERROR << "Only support post process with float32 data." << std::endl; return false; } - float* data = reinterpret_cast(infer_results[0].Data()) + bs * infer_results[0].shape[1] * infer_results[0].shape[2]; + const float* data = reinterpret_cast(infer_results[0].Data()) + bs * infer_results[0].shape[1] * infer_results[0].shape[2]; for (size_t i = 0; i < infer_results[0].shape[1]; ++i) { int s = i * infer_results[0].shape[2]; float confidence = data[s + 4]; if (multi_label_) { for (size_t j = 5; j < infer_results[0].shape[2]; ++j) { confidence = data[s + 4]; - float* class_score = data + s + j; + const float* class_score = data + s + j; confidence *= (*class_score); // filter boxes by conf_threshold if (confidence <= conf_threshold_) { @@ -66,7 +66,7 @@ bool YOLOv5Postprocessor::Postprocess( (*results)[bs].scores.push_back(confidence); } } else { - float* max_class_score = + const float* max_class_score = std::max_element(data + s + 5, data + s + infer_results[0].shape[2]); confidence *= (*max_class_score); // filter boxes by conf_threshold From 0a5611b8121ace5073132ef5fe2dfbb3d382c7c9 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Thu, 10 Nov 2022 11:49:36 +0000 Subject: [PATCH 25/50] fixed bug --- fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc index dc6b7d1d069..9cd2824d9ea 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc @@ -50,7 +50,7 @@ void BindYOLOv5(pybind11::module& m) { std::vector results; std::vector inputs; PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); - if (!self.Run(inputs, &results)) { + if (!self.Run(inputs, &results, im_info)) { pybind11::eval("raise Exception('Failed to postprocess the runtime result in YOLOv5Postprocessor.')"); } return results; From 5706ff621d25dfdf0bf6be3bc4ef6e05b10b0e2c Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Thu, 10 Nov 2022 12:01:20 +0000 Subject: [PATCH 26/50] fixed bug --- fastdeploy/vision/detection/contrib/yolov5/yolov5.h | 4 ++-- python/fastdeploy/vision/detection/__init__.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) mode change 100644 => 100755 python/fastdeploy/vision/detection/__init__.py diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.h b/fastdeploy/vision/detection/contrib/yolov5/yolov5.h index 78621398881..09de9c45ee2 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/yolov5.h +++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.h @@ -40,7 +40,7 @@ class FASTDEPLOY_DECL YOLOv5 : public FastDeployModel { std::string ModelName() const { return "yolov5"; } - /** \brief Predict the detection result for an input image + /** \brief DEPRECATED Predict the detection result for an input image, remove at 1.0 version * * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format * \param[in] result The output detection result will be writen to this structure @@ -63,7 +63,7 @@ class FASTDEPLOY_DECL YOLOv5 : public FastDeployModel { /** \brief Predict the detection results for a batch of input images * * \param[in] imgs, The input image list, each element comes from cv::imread() - * \param[in] results The output classification result list + * \param[in] results The output detection result list * \return true if the prediction successed, otherwise false */ virtual bool BatchPredict(const std::vector& imgs, diff --git a/python/fastdeploy/vision/detection/__init__.py b/python/fastdeploy/vision/detection/__init__.py old mode 100644 new mode 100755 index a4fe4c035be..47d175af755 --- a/python/fastdeploy/vision/detection/__init__.py +++ b/python/fastdeploy/vision/detection/__init__.py @@ -18,7 +18,7 @@ from .contrib.scaled_yolov4 import ScaledYOLOv4 from .contrib.nanodet_plus import NanoDetPlus from .contrib.yolox import YOLOX -from .contrib.yolov5 import YOLOv5 +from .contrib.yolov5 import * from .contrib.yolov5lite import YOLOv5Lite from .contrib.yolov6 import YOLOv6 from .contrib.yolov7end2end_trt import YOLOv7End2EndTRT From 58175642f98db91e75e94fd6cc2bda92034eacb0 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Thu, 10 Nov 2022 12:25:25 +0000 Subject: [PATCH 27/50] fix pybind --- fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc | 2 +- fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc index 5f0cb9a6f7c..de6a8de6762 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc @@ -23,8 +23,8 @@ YOLOv5Postprocessor::YOLOv5Postprocessor() { conf_threshold_ = 0.25; nms_threshold_ = 0.5; multi_label_ = true; - initialized_ = true; max_wh_ = 7680.0; + initialized_ = true; } bool YOLOv5Postprocessor::Postprocess( diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc index 9cd2824d9ea..d9758e07ebc 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc @@ -18,7 +18,7 @@ namespace fastdeploy { void BindYOLOv5(pybind11::module& m) { pybind11::class_( m, "YOLOv5Preprocessor") - .def(pybind11::init()) + .def(pybind11::init<>()) .def("run", [](vision::detection::YOLOv5Preprocessor& self, std::vector& im_list) { std::vector images; for (size_t i = 0; i < im_list.size(); ++i) { @@ -36,7 +36,7 @@ void BindYOLOv5(pybind11::module& m) { pybind11::class_( m, "YOLOv5Postprocessor") - .def(pybind11::init()) + .def(pybind11::init<>()) .def("run", [](vision::detection::YOLOv5Postprocessor& self, std::vector& inputs, const std::map>& im_info) { std::vector results; From 3e94507fef0b117509ac8b6df4fc5c763e8a824e Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Thu, 10 Nov 2022 12:54:54 +0000 Subject: [PATCH 28/50] rm useless code --- fastdeploy/vision/detection/contrib/yolov5/yolov5.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.h b/fastdeploy/vision/detection/contrib/yolov5/yolov5.h index 09de9c45ee2..53bcfce755d 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/yolov5.h +++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.h @@ -36,8 +36,6 @@ class FASTDEPLOY_DECL YOLOv5 : public FastDeployModel { const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - ~YOLOv5(); - std::string ModelName() const { return "yolov5"; } /** \brief DEPRECATED Predict the detection result for an input image, remove at 1.0 version From b97542fe1f38d7725268f3aa29eec0bbb2ea388d Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Sun, 13 Nov 2022 05:31:57 +0000 Subject: [PATCH 29/50] add convert_and_permute --- .../common/processors/convert_and_permute.cc | 94 +++++++++++++++++++ .../common/processors/convert_and_permute.h | 66 +++++++++++++ .../vision/common/processors/transform.h | 1 + .../detection/contrib/yolov5/postprocessor.cc | 51 ++++------ .../detection/contrib/yolov5/postprocessor.h | 13 +-- .../detection/contrib/yolov5/preprocessor.cc | 10 +- .../detection/contrib/yolov5/preprocessor.h | 18 +--- 7 files changed, 190 insertions(+), 63 deletions(-) create mode 100644 fastdeploy/vision/common/processors/convert_and_permute.cc create mode 100644 fastdeploy/vision/common/processors/convert_and_permute.h mode change 100644 => 100755 fastdeploy/vision/common/processors/transform.h diff --git a/fastdeploy/vision/common/processors/convert_and_permute.cc b/fastdeploy/vision/common/processors/convert_and_permute.cc new file mode 100644 index 00000000000..042e9a13962 --- /dev/null +++ b/fastdeploy/vision/common/processors/convert_and_permute.cc @@ -0,0 +1,94 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/common/processors/convert_and_permute.h" + +namespace fastdeploy { +namespace vision { + +ConvertAndPermute::ConvertAndPermute(const std::vector& alpha, + const std::vector& beta, + bool swap_rb) { + FDASSERT(alpha.size() == beta.size(), + "Convert: requires the size of alpha equal to the size of beta."); + FDASSERT(alpha.size() > 0 && beta.size() > 0, + "Convert: requires the size of alpha and beta > 0."); + alpha_.assign(alpha.begin(), alpha.end()); + beta_.assign(beta.begin(), beta.end()); + swap_rb_ = swap_rb; +} + +bool ConvertAndPermute::ImplByOpenCV(FDMat* mat) { + cv::Mat* im = mat->GetOpenCVMat(); + int origin_w = im->cols; + int origin_h = im->rows; + std::vector split_im; + cv::split(*im, split_im); + if (swap_rb_) std::swap(split_im[0], split_im[2]); + for (int c = 0; c < im->channels(); c++) { + split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]); + } + cv::Mat res(origin_h, origin_w, CV_32FC(im->channels())); + for (int i = 0; i < im->channels(); ++i) { + cv::extractChannel(split_im[i], + cv::Mat(origin_h, origin_w, CV_32FC1, + res.ptr() + i * origin_h * origin_w * 4), + 0); + } + + mat->SetMat(res); + mat->layout = Layout::CHW; + return true; +} + +#ifdef ENABLE_FLYCV +bool ConvertAndPermute::ImplByFlyCV(FDMat* mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "Only supports input with HWC layout." << std::endl; + return false; + } + fcv::Mat* im = mat->GetFlyCVMat(); + if (im->channels() != 3) { + FDERROR << "Only supports 3-channels image in FlyCV, but now it's " + << im->channels() << "." << std::endl; + return false; + } + std::vector mean(3, 0); + std::vector std(3, 0); + for (size_t i = 0; i < 3; ++i) { + std[i] = 1.0 / alpha_[i]; + mean[i] = -1 * beta_[i] * std[i]; + } + + std::vector channel_reorder_index = {0, 1, 2}; + if (swap_rb_) std::swap(channel_reorder_index[0], channel_reorder_index[2]); + + fcv::Mat new_im; + fcv::normalize_to_submean_to_reorder(*im, mean, std, channel_reorder_index, + new_im, false); + mat->SetMat(new_im); + mat->layout = Layout::CHW; + return true; +} +#endif + +bool ConvertAndPermute::Run(FDMat* mat, const std::vector& alpha, + const std::vector& beta, bool swap_rb, + ProcLib lib) { + auto n = ConvertAndPermute(alpha, beta, swap_rb); + return n(mat, lib); +} + +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/convert_and_permute.h b/fastdeploy/vision/common/processors/convert_and_permute.h new file mode 100644 index 00000000000..d4fc5da1213 --- /dev/null +++ b/fastdeploy/vision/common/processors/convert_and_permute.h @@ -0,0 +1,66 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/vision/common/processors/base.h" + +namespace fastdeploy { +namespace vision { +class FASTDEPLOY_DECL ConvertAndPermute : public Processor { + public: + ConvertAndPermute(const std::vector& alpha = std::vector(), + const std::vector& beta = std::vector(), + bool swap_rb = false); + bool ImplByOpenCV(FDMat* mat); +#ifdef ENABLE_FLYCV + bool ImplByFlyCV(FDMat* mat); +#endif + std::string Name() { return "ConvertAndPermute"; } + + static bool Run(FDMat* mat, const std::vector& alpha, + const std::vector& beta, bool swap_rb = false, + ProcLib lib = ProcLib::DEFAULT); + + std::vector GetAlpha() const { return alpha_; } + + void SetAlpha(const std::vector& alpha) { + alpha_.clear(); + std::vector().swap(alpha_); + alpha_.assign(alpha.begin(), alpha.end()); + } + + std::vector GetBeta() const { return beta_; } + + void SetBeta(const std::vector& beta) { + beta_.clear(); + std::vector().swap(beta_); + beta_.assign(beta.begin(), beta.end()); + } + + bool GetSwapRB() { + return swap_rb_; + } + + void SetSwapRB(const bool& swap_rb) { + swap_rb_ = swap_rb; + } + + private: + std::vector alpha_; + std::vector beta_; + bool swap_rb_; +}; +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/transform.h b/fastdeploy/vision/common/processors/transform.h old mode 100644 new mode 100755 index 7ba58167d46..e3f71463657 --- a/fastdeploy/vision/common/processors/transform.h +++ b/fastdeploy/vision/common/processors/transform.h @@ -18,6 +18,7 @@ #include "fastdeploy/vision/common/processors/center_crop.h" #include "fastdeploy/vision/common/processors/color_space_convert.h" #include "fastdeploy/vision/common/processors/convert.h" +#include "fastdeploy/vision/common/processors/convert_and_permute.h" #include "fastdeploy/vision/common/processors/crop.h" #include "fastdeploy/vision/common/processors/hwc2chw.h" #include "fastdeploy/vision/common/processors/limit_by_stride.h" diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc index de6a8de6762..6f36078a3f0 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc @@ -24,29 +24,36 @@ YOLOv5Postprocessor::YOLOv5Postprocessor() { nms_threshold_ = 0.5; multi_label_ = true; max_wh_ = 7680.0; - initialized_ = true; } -bool YOLOv5Postprocessor::Postprocess( - const std::vector& infer_results, std::vector* results, - const std::map>& im_info) { - for (size_t bs = 0; bs < results->size(); ++bs) { +bool YOLOv5Postprocessor::Run(const std::vector& tensors, std::vector* results, + const std::map>& im_info) { + if (!initialized_) { + FDERROR << "Postprocessor is not initialized." << std::endl; + return false; + } + + int batch = tensors[0].shape[0]; + + results->resize(batch); + + for (size_t bs = 0; bs < batch; ++bs) { (*results)[bs].Clear(); if (multi_label_) { - (*results)[bs].Reserve(infer_results[0].shape[1] * (infer_results[0].shape[2] - 5)); + (*results)[bs].Reserve(tensors[0].shape[1] * (tensors[0].shape[2] - 5)); } else { - (*results)[bs].Reserve(infer_results[0].shape[1]); + (*results)[bs].Reserve(tensors[0].shape[1]); } - if (infer_results[0].dtype != FDDataType::FP32) { + if (tensors[0].dtype != FDDataType::FP32) { FDERROR << "Only support post process with float32 data." << std::endl; return false; } - const float* data = reinterpret_cast(infer_results[0].Data()) + bs * infer_results[0].shape[1] * infer_results[0].shape[2]; - for (size_t i = 0; i < infer_results[0].shape[1]; ++i) { - int s = i * infer_results[0].shape[2]; + const float* data = reinterpret_cast(tensors[0].Data()) + bs * tensors[0].shape[1] * tensors[0].shape[2]; + for (size_t i = 0; i < tensors[0].shape[1]; ++i) { + int s = i * tensors[0].shape[2]; float confidence = data[s + 4]; if (multi_label_) { - for (size_t j = 5; j < infer_results[0].shape[2]; ++j) { + for (size_t j = 5; j < tensors[0].shape[2]; ++j) { confidence = data[s + 4]; const float* class_score = data + s + j; confidence *= (*class_score); @@ -67,7 +74,7 @@ bool YOLOv5Postprocessor::Postprocess( } } else { const float* max_class_score = - std::max_element(data + s + 5, data + s + infer_results[0].shape[2]); + std::max_element(data + s + 5, data + s + tensors[0].shape[2]); confidence *= (*max_class_score); // filter boxes by conf_threshold if (confidence <= conf_threshold_) { @@ -123,24 +130,6 @@ bool YOLOv5Postprocessor::Postprocess( return true; } -bool YOLOv5Postprocessor::Run(const std::vector& tensors, std::vector* results, - const std::map>& im_info) { - if (!initialized_) { - FDERROR << "Postprocessor is not initialized." << std::endl; - return false; - } - - int batch = tensors[0].shape[0]; - - results->resize(batch); - - if (!Postprocess(tensors, results, im_info)) { - FDERROR << "Failed to preprocess input image." << std::endl; - return false; - } - return true; -} - } // namespace detection } // namespace vision } // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h index 81d82c9207c..110a22feed7 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h @@ -40,7 +40,7 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor { const std::map>& im_info); /// Set conf_threshold, default 0.25 - void SetConfThreshold(float conf_threshold) { + void SetConfThreshold(const float& conf_threshold) { conf_threshold_ = conf_threshold; } @@ -48,7 +48,7 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor { float GetConfThreshold() const { return conf_threshold_; } /// Set nms_threshold, default 0.5 - void SetNMSThreshold(float nms_threshold) { + void SetNMSThreshold(const float& nms_threshold) { nms_threshold_ = nms_threshold; } @@ -56,19 +56,14 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor { float GetNMSThreshold() const { return nms_threshold_; } /// Set multi_label, default true - void SetMultiLabel(bool multi_label) { + void SetMultiLabel(const bool& multi_label) { multi_label_ = multi_label; } /// Get multi_label, default true bool GetMultiLabel() const { return multi_label_; } - private: - bool Postprocess(const std::vector& tensors, - std::vector* results, - const std::map>& im_info); - - bool initialized_ = false; + protected: float conf_threshold_; float nms_threshold_; bool multi_label_; diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc index 031b4e152e7..aa4ee9ae6c9 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc @@ -27,7 +27,6 @@ YOLOv5Preprocessor::YOLOv5Preprocessor() { is_scale_up_ = false; stride_ = 32; max_wh_ = 7680.0; - initialized_ = true; } void YOLOv5Preprocessor::LetterBox(FDMat* mat) { @@ -83,21 +82,16 @@ bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output, } // yolov5's preprocess steps // 1. letterbox - // 2. BGR->RGB - // 3. HWC->CHW + // 2. convert_and_permute(swap_rb=true) LetterBox(mat); - BGR2RGB::Run(mat); - // Compute `result = mat * alpha + beta` directly by channel std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; std::vector beta = {0.0f, 0.0f, 0.0f}; - Convert::Run(mat, alpha, beta); + ConvertAndPermute::Run(mat, alpha, beta, true); // Record output shape of preprocessed image (*im_info)["output_shape"] = {static_cast(mat->Height()), static_cast(mat->Width())}; - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); mat->ShareWithTensor(output); output->ExpandDim(0); // reshape to n, h, w, c return true; diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h index 634531681a1..bb2eaaa8bb6 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h +++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h @@ -39,28 +39,25 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor { std::map>* im_info); /// Set target size, tuple of (width, height), default size = {640, 640} - void SetSize(std::vector size) { size_ = size; } + void SetSize(const std::vector& size) { size_ = size; } /// Get target size, tuple of (width, height), default size = {640, 640} std::vector GetSize() const { return size_; } /// Set padding value, size should be the same as channels - void SetPaddingValue(std::vector padding_value) { + void SetPaddingValue(const std::vector& padding_value) { padding_value_ = padding_value; } /// Get padding value, size should be the same as channels std::vector GetPaddingValue() const { return padding_value_; } - private: + protected: bool Preprocess(FDMat* mat, FDTensor* output, std::map>* im_info); - bool IsDynamicInput() const { return is_dynamic_input_; } - void LetterBox(FDMat* mat); - bool initialized_ = false; // target size, tuple of (width, height), default size = {640, 640} std::vector size_; @@ -83,15 +80,6 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor { // for offseting the boxes by classes when using NMS float max_wh_; - - // whether to inference with dynamic shape (e.g ONNX export with dynamic shape - // or not.) - // YOLOv5 official 'export_onnx.py' script will export dynamic ONNX by - // default. - // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This - // value will - // auto check by fastdeploy after the internal Runtime already initialized. - bool is_dynamic_input_; }; } // namespace detection From d83fd12830671eb4380419ed7b5c6bef601a6dcf Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Sun, 13 Nov 2022 05:48:50 +0000 Subject: [PATCH 30/50] fixed bugs --- fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc | 5 ----- fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc | 4 ---- 2 files changed, 9 deletions(-) diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc index 6f36078a3f0..ef6fc48c52d 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc @@ -28,11 +28,6 @@ YOLOv5Postprocessor::YOLOv5Postprocessor() { bool YOLOv5Postprocessor::Run(const std::vector& tensors, std::vector* results, const std::map>& im_info) { - if (!initialized_) { - FDERROR << "Postprocessor is not initialized." << std::endl; - return false; - } - int batch = tensors[0].shape[0]; results->resize(batch); diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc index aa4ee9ae6c9..d25fca76daf 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc @@ -99,10 +99,6 @@ bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output, bool YOLOv5Preprocessor::Run(std::vector* images, std::vector* outputs, std::map>* im_info) { - if (!initialized_) { - FDERROR << "The preprocessor is not initialized." << std::endl; - return false; - } if (images->size() == 0) { FDERROR << "The size of input images should be greater than 0." << std::endl; return false; From 302ca01014cb5b12a3fc9eba3e6598091f605135 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Sun, 13 Nov 2022 08:30:57 +0000 Subject: [PATCH 31/50] fixed im_info for bs_predict --- .../detection/contrib/yolov5/postprocessor.cc | 8 ++++---- .../detection/contrib/yolov5/postprocessor.h | 6 +++--- .../detection/contrib/yolov5/preprocessor.cc | 4 ++-- .../vision/detection/contrib/yolov5/preprocessor.h | 4 ++-- .../vision/detection/contrib/yolov5/yolov5.cc | 5 +++-- .../detection/contrib/yolov5/yolov5_pybind.cc | 14 +++++++------- .../fastdeploy/vision/detection/contrib/yolov5.py | 6 +++--- 7 files changed, 24 insertions(+), 23 deletions(-) diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc index ef6fc48c52d..dd61efb0023 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc @@ -27,7 +27,7 @@ YOLOv5Postprocessor::YOLOv5Postprocessor() { } bool YOLOv5Postprocessor::Run(const std::vector& tensors, std::vector* results, - const std::map>& im_info) { + const std::vector>>& ims_info) { int batch = tensors[0].shape[0]; results->resize(batch); @@ -94,9 +94,9 @@ bool YOLOv5Postprocessor::Run(const std::vector& tensors, std::vector< utils::NMS(&((*results)[bs]), nms_threshold_); // scale the boxes to the origin image shape - auto iter_out = im_info.find("output_shape"); - auto iter_ipt = im_info.find("input_shape"); - FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), + auto iter_out = ims_info[bs].find("output_shape"); + auto iter_ipt = ims_info[bs].find("input_shape"); + FDASSERT(iter_out != ims_info[bs].end() && iter_ipt != ims_info[bs].end(), "Cannot find input_shape or output_shape from im_info."); float out_h = iter_out->second[0]; float out_w = iter_out->second[1]; diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h index 110a22feed7..9623165f951 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h @@ -32,12 +32,12 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor { * * \param[in] tensors The inference result from runtime * \param[in] result The output result of detection - * \param[in] im_info record input_shape and output_shape + * \param[in] ims_info The shape info list, record input_shape and output_shape * \return true if the postprocess successed, otherwise false */ bool Run(const std::vector& tensors, - std::vector* results, - const std::map>& im_info); + std::vector* results, + const std::vector>>& ims_info); /// Set conf_threshold, default 0.25 void SetConfThreshold(const float& conf_threshold) { diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc index d25fca76daf..6517d7c72ad 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc @@ -98,7 +98,7 @@ bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output, } bool YOLOv5Preprocessor::Run(std::vector* images, std::vector* outputs, - std::map>* im_info) { + std::vector>>* ims_info) { if (images->size() == 0) { FDERROR << "The size of input images should be greater than 0." << std::endl; return false; @@ -107,7 +107,7 @@ bool YOLOv5Preprocessor::Run(std::vector* images, std::vector* // Concat all the preprocessed data to a batch tensor std::vector tensors(images->size()); for (size_t i = 0; i < images->size(); ++i) { - if (!Preprocess(&(*images)[i], &tensors[i], im_info)) { + if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) { FDERROR << "Failed to preprocess input image." << std::endl; return false; } diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h index bb2eaaa8bb6..b3559685db7 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h +++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h @@ -32,11 +32,11 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor { * * \param[in] images The input image data list, all the elements are returned by cv::imread() * \param[in] outputs The output tensors which will feed in runtime - * \param[in] im_info record input_shape and output_shape + * \param[in] ims_info The shape info list, record input_shape and output_shape * \return true if the preprocess successed, otherwise false */ bool Run(std::vector* images, std::vector* outputs, - std::map>* im_info); + std::vector>>* ims_info); /// Set target size, tuple of (width, height), default size = {640, 640} void SetSize(const std::vector& size) { size_ = size; } diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc index b0df6b4e955..affad85f527 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc @@ -62,10 +62,11 @@ bool YOLOv5::Predict(const cv::Mat& im, DetectionResult* result) { } bool YOLOv5::BatchPredict(const std::vector& images, std::vector* results) { - std::map> im_info; + std::vector>> ims_info; + ims_info.resize(images.size()); std::vector fd_images = WrapMat(images); - if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &im_info)) { + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) { FDERROR << "Failed to preprocess the input image." << std::endl; return false; } diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc index d9758e07ebc..4e2f69957e4 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc @@ -25,11 +25,11 @@ void BindYOLOv5(pybind11::module& m) { images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); } std::vector outputs; - std::map> im_info; - if (!self.Run(&images, &outputs, &im_info)) { + std::vector>> ims_info; + if (!self.Run(&images, &outputs, &ims_info)) { pybind11::eval("raise Exception('Failed to preprocess the input data in PaddleClasPreprocessor.')"); } - return make_pair(outputs, im_info); + return make_pair(outputs, ims_info); }) .def_property("size", &vision::detection::YOLOv5Preprocessor::GetSize, &vision::detection::YOLOv5Preprocessor::SetSize) .def_property("padding_value", &vision::detection::YOLOv5Preprocessor::GetPaddingValue, &vision::detection::YOLOv5Preprocessor::SetPaddingValue); @@ -38,19 +38,19 @@ void BindYOLOv5(pybind11::module& m) { m, "YOLOv5Postprocessor") .def(pybind11::init<>()) .def("run", [](vision::detection::YOLOv5Postprocessor& self, std::vector& inputs, - const std::map>& im_info) { + const std::vector>>& ims_info) { std::vector results; - if (!self.Run(inputs, &results, im_info)) { + if (!self.Run(inputs, &results, ims_info)) { pybind11::eval("raise Exception('Failed to postprocess the runtime result in YOLOv5Postprocessor.')"); } return results; }) .def("run", [](vision::detection::YOLOv5Postprocessor& self, std::vector& input_array, - const std::map>& im_info) { + const std::vector>>& ims_info) { std::vector results; std::vector inputs; PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); - if (!self.Run(inputs, &results, im_info)) { + if (!self.Run(inputs, &results, ims_info)) { pybind11::eval("raise Exception('Failed to postprocess the runtime result in YOLOv5Postprocessor.')"); } return results; diff --git a/python/fastdeploy/vision/detection/contrib/yolov5.py b/python/fastdeploy/vision/detection/contrib/yolov5.py index a7fb904c952..60ee46eec34 100644 --- a/python/fastdeploy/vision/detection/contrib/yolov5.py +++ b/python/fastdeploy/vision/detection/contrib/yolov5.py @@ -67,14 +67,14 @@ def __init__(self): """ self._postprocessor = C.vision.detection.YOLOv5Postprocessor() - def run(self, runtime_results, im_info): + def run(self, runtime_results, ims_info): """Postprocess the runtime results for YOLOv5 :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime - :param: im_info: (dict)Record input_shape and output_shape + :param: ims_info: (list of dict)Record input_shape and output_shape :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) """ - return self._postprocessor.run(runtime_results, im_info) + return self._postprocessor.run(runtime_results, ims_info) @property def conf_threshold(self): From 4b9cc780795ad027a576f6f5417a266bd460262c Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Sun, 13 Nov 2022 08:34:10 +0000 Subject: [PATCH 32/50] fixed bug --- fastdeploy/vision/detection/contrib/yolov5/yolov5.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc index affad85f527..40e343891b6 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc @@ -77,7 +77,7 @@ bool YOLOv5::BatchPredict(const std::vector& images, std::vector Date: Sun, 13 Nov 2022 08:42:50 +0000 Subject: [PATCH 33/50] add bs_predict for yolov5 --- tests/models/test_yolov5.py | 118 ++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100755 tests/models/test_yolov5.py diff --git a/tests/models/test_yolov5.py b/tests/models/test_yolov5.py new file mode 100755 index 00000000000..5a32fb2289b --- /dev/null +++ b/tests/models/test_yolov5.py @@ -0,0 +1,118 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import fastdeploy as fd +import cv2 +import os +import pickle +import numpy as np +import runtime_config as rc + + +def test_detection_yolov5(): + model_url = "https://bj.bcebos.com/paddlehub/fastdeploy/yolov5s.onnx" + input_url1 = "https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg" + input_url2 = "https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000570688.jpg" + result_url1 = "https://bj.bcebos.com/paddlehub/fastdeploy/yolov5_result1.pkl" + result_url2 = "https://bj.bcebos.com/paddlehub/fastdeploy/yolov5_result2.pkl" + fd.download(model_url, "resources") + fd.download(input_url1, "resources") + fd.download(input_url2, "resources") + fd.download(result_url1, "resources") + fd.download(result_url2, "resources") + + model_file = "resources/yolov5s.onnx" + model = fd.vision.detection.YOLOv5( + model_file, runtime_option=rc.test_option) + + with open("resources/yolov5_result1.pkl", "rb") as f: + expect1 = pickle.load(f) + + with open("resources/yolov5_result2.pkl", "rb") as f: + expect2 = pickle.load(f) + + # compare diff + im1 = cv2.imread("./resources/000000014439.jpg") + im2 = cv2.imread("./resources/000000570688.jpg") + + for i in range(3): + # test single predict + result1 = model.predict(im1) + result2 = model.predict(im2) + + diff_boxes_1 = np.fabs( + np.array(result1.boxes) - np.array(expect1["boxes"])) + diff_boxes_2 = np.fabs( + np.array(result2.boxes) - np.array(expect2["boxes"])) + + diff_label_1 = np.fabs( + np.array(result1.label_ids) - np.array(expect1["label_ids"])) + diff_label_2 = np.fabs( + np.array(result2.label_ids) - np.array(expect2["label_ids"])) + + diff_scores_1 = np.fabs( + np.array(result1.scores) - np.array(expect1["scores"])) + diff_scores_2 = np.fabs( + np.array(result2.scores) - np.array(expect2["scores"])) + + assert diff_boxes_1.max( + ) < 1e-06, "There's difference in detection boxes 1." + assert diff_label_1.max( + ) < 1e-06, "There's difference in detection label 1." + assert diff_scores_1.max( + ) < 1e-05, "There's difference in detection score 1." + + assert diff_boxes_2.max( + ) < 1e-06, "There's difference in detection boxes 2." + assert diff_label_2.max( + ) < 1e-06, "There's difference in detection label 2." + assert diff_scores_2.max( + ) < 1e-05, "There's difference in detection score 2." + + # test batch predict + results = model.batch_predict([im1, im2]) + result1 = results[0] + result2 = results[1] + + diff_boxes_1 = np.fabs( + np.array(result1.boxes) - np.array(expect1["boxes"])) + diff_boxes_2 = np.fabs( + np.array(result2.boxes) - np.array(expect2["boxes"])) + + diff_label_1 = np.fabs( + np.array(result1.label_ids) - np.array(expect1["label_ids"])) + diff_label_2 = np.fabs( + np.array(result2.label_ids) - np.array(expect2["label_ids"])) + + diff_scores_1 = np.fabs( + np.array(result1.scores) - np.array(expect1["scores"])) + diff_scores_2 = np.fabs( + np.array(result2.scores) - np.array(expect2["scores"])) + assert diff_boxes_1.max( + ) < 1e-06, "There's difference in detection boxes 1." + assert diff_label_1.max( + ) < 1e-06, "There's difference in detection label 1." + assert diff_scores_1.max( + ) < 1e-05, "There's difference in detection score 1." + + assert diff_boxes_2.max( + ) < 1e-06, "There's difference in detection boxes 2." + assert diff_label_2.max( + ) < 1e-06, "There's difference in detection label 2." + assert diff_scores_2.max( + ) < 1e-05, "There's difference in detection score 2." + + +if __name__ == "__main__": + test_detection_yolov5() From 0f3b4a75f0dd6aa5d7565e6d5a42673f9577744b Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Mon, 14 Nov 2022 07:21:36 +0000 Subject: [PATCH 34/50] Add runtime test and batch eval --- .../detection/contrib/yolov5/preprocessor.cc | 1 + .../vision/detection/contrib/yolov5/yolov5.cc | 1 - .../fastdeploy/vision/evaluation/detection.py | 51 ++++++++++++++----- tests/models/test_yolov5.py | 26 ++++++++++ 4 files changed, 65 insertions(+), 14 deletions(-) diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc index 7d2ffc0b884..796104c3699 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc @@ -103,6 +103,7 @@ bool YOLOv5Preprocessor::Run(std::vector* images, std::vector* FDERROR << "The size of input images should be greater than 0." << std::endl; return false; } + ims_info->resize(images.size()); outputs->resize(1); // Concat all the preprocessed data to a batch tensor std::vector tensors(images->size()); diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc index 40e343891b6..422cf501c42 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc @@ -63,7 +63,6 @@ bool YOLOv5::Predict(const cv::Mat& im, DetectionResult* result) { bool YOLOv5::BatchPredict(const std::vector& images, std::vector* results) { std::vector>> ims_info; - ims_info.resize(images.size()); std::vector fd_images = WrapMat(images); if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) { diff --git a/python/fastdeploy/vision/evaluation/detection.py b/python/fastdeploy/vision/evaluation/detection.py index 98c6794feab..d670729ef2a 100644 --- a/python/fastdeploy/vision/evaluation/detection.py +++ b/python/fastdeploy/vision/evaluation/detection.py @@ -23,7 +23,8 @@ def eval_detection(model, ann_file, conf_threshold=None, nms_iou_threshold=None, - plot=False): + plot=False, + batch_size=1): from .utils import CocoDetection from .utils import COCOMetric import cv2 @@ -61,19 +62,43 @@ def eval_detection(model, start_time = time.time() im = cv2.imread(image_info["image"]) im_id = image_info["im_id"] - if conf_threshold is None and nms_iou_threshold is None: - result = model.predict(im.copy()) + if batch_size == 1: + if conf_threshold is None and nms_iou_threshold is None: + result = model.predict(im.copy()) + else: + result = model.predict(im, conf_threshold, nms_iou_threshold) + pred = { + 'bbox': [[c] + [s] + b + for b, s, c in zip(result.boxes, result.scores, + result.label_ids)], + 'bbox_num': len(result.boxes), + 'im_id': im_id + } + eval_metric.update(im_id, pred) else: - result = model.predict(im, conf_threshold, nms_iou_threshold) - pred = { - 'bbox': - [[c] + [s] + b - for b, s, c in zip(result.boxes, result.scores, result.label_ids) - ], - 'bbox_num': len(result.boxes), - 'im_id': im_id - } - eval_metric.update(im_id, pred) + im_list = list() + im_id_list = list() + im_list.append(im) + im_id_list.append(im_id) + if (i + 1) % batch_size != 0: + continue + if conf_threshold is None and nms_iou_threshold is None: + results = model.batch_predict(im_list) + else: + model.postprocessor.conf_threshold = conf_threshold + model.postprocessor.nms_threshold = nms_iou_threshold + results = model.batch_predict(im_list, conf_threshold, + nms_iou_threshold) + for b in range(batch_size): + pred = { + 'bbox': [[c] + [s] + b + for b, s, c in zip(results[b].boxes, results[ + b].scores, results[b].label_ids)], + 'bbox_num': len(results[b].boxes), + 'im_id': im_id_list[b] + } + eval_metric.update(im_id_list[b], pred) + if i == image_num - 1: end_time = time.time() average_inference_time = round( diff --git a/tests/models/test_yolov5.py b/tests/models/test_yolov5.py index 5a32fb2289b..7cc9f082299 100755 --- a/tests/models/test_yolov5.py +++ b/tests/models/test_yolov5.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from fastdeploy import ModelFormat import fastdeploy as fd import cv2 import os @@ -36,6 +37,12 @@ def test_detection_yolov5(): model = fd.vision.detection.YOLOv5( model_file, runtime_option=rc.test_option) + preprocessor = fd.vision.detection.YOLOv5Preprocessor() + postprocessor = fd.vision.detection.YOLOv5Postprocessor() + + rc.test_option.set_model_path(model_file, model_format=ModelFormat.ONNX) + runtime = fd.Runtime(rc.test_option) + with open("resources/yolov5_result1.pkl", "rb") as f: expect1 = pickle.load(f) @@ -47,6 +54,25 @@ def test_detection_yolov5(): im2 = cv2.imread("./resources/000000570688.jpg") for i in range(3): + # test runtime + input_tensors, ims_info = preprocessor.run([im1]) + output_tensors = runtime.infer({"images": input_tensors[0]}) + results = postprocessor.run(output_tensors, ims_info) + result1 = results[0] + + diff_boxes_1 = np.fabs( + np.array(result1.boxes) - np.array(expect1["boxes"])) + diff_label_1 = np.fabs( + np.array(result1.label_ids) - np.array(expect1["label_ids"])) + diff_scores_1 = np.fabs( + np.array(result1.scores) - np.array(expect1["scores"])) + + assert diff_boxes_1.max( + ) < 1e-06, "There's difference in detection boxes 1." + assert diff_label_1.max( + ) < 1e-06, "There's difference in detection label 1." + assert diff_scores_1.max( + ) < 1e-05, "There's difference in detection score 1." # test single predict result1 = model.predict(im1) result2 = model.predict(im2) From 386b9081fd0a82ca53b4c3a73228808ee252e6ac Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Mon, 14 Nov 2022 07:26:59 +0000 Subject: [PATCH 35/50] deal with comments --- fastdeploy/vision/common/processors/convert_and_permute.cc | 4 ++-- fastdeploy/vision/common/processors/convert_and_permute.h | 2 +- fastdeploy/vision/detection/contrib/yolov5/postprocessor.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fastdeploy/vision/common/processors/convert_and_permute.cc b/fastdeploy/vision/common/processors/convert_and_permute.cc index 042e9a13962..73cbb5b48f8 100644 --- a/fastdeploy/vision/common/processors/convert_and_permute.cc +++ b/fastdeploy/vision/common/processors/convert_and_permute.cc @@ -21,9 +21,9 @@ ConvertAndPermute::ConvertAndPermute(const std::vector& alpha, const std::vector& beta, bool swap_rb) { FDASSERT(alpha.size() == beta.size(), - "Convert: requires the size of alpha equal to the size of beta."); + "ConvertAndPermute: requires the size of alpha equal to the size of beta."); FDASSERT(alpha.size() > 0 && beta.size() > 0, - "Convert: requires the size of alpha and beta > 0."); + "ConvertAndPermute: requires the size of alpha and beta > 0."); alpha_.assign(alpha.begin(), alpha.end()); beta_.assign(beta.begin(), beta.end()); swap_rb_ = swap_rb; diff --git a/fastdeploy/vision/common/processors/convert_and_permute.h b/fastdeploy/vision/common/processors/convert_and_permute.h index d4fc5da1213..a36dfd56383 100644 --- a/fastdeploy/vision/common/processors/convert_and_permute.h +++ b/fastdeploy/vision/common/processors/convert_and_permute.h @@ -53,7 +53,7 @@ class FASTDEPLOY_DECL ConvertAndPermute : public Processor { return swap_rb_; } - void SetSwapRB(const bool& swap_rb) { + void SetSwapRB(bool swap_rb) { swap_rb_ = swap_rb; } diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h index 9623165f951..a1479dd9403 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h @@ -56,7 +56,7 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor { float GetNMSThreshold() const { return nms_threshold_; } /// Set multi_label, default true - void SetMultiLabel(const bool& multi_label) { + void SetMultiLabel(bool multi_label) { multi_label_ = multi_label; } From 810d7cd78157a5e8676c7f8ce89bfe5adf8bf262 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Mon, 14 Nov 2022 07:32:58 +0000 Subject: [PATCH 36/50] fixed bug --- fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc index 796104c3699..112a4d4d5da 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc @@ -103,7 +103,7 @@ bool YOLOv5Preprocessor::Run(std::vector* images, std::vector* FDERROR << "The size of input images should be greater than 0." << std::endl; return false; } - ims_info->resize(images.size()); + ims_info->resize(images->size()); outputs->resize(1); // Concat all the preprocessed data to a batch tensor std::vector tensors(images->size()); From b2002324bd8d681e1b792ab2f6899e2df2e9dd98 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Mon, 14 Nov 2022 08:45:41 +0000 Subject: [PATCH 37/50] update testcase --- tests/models/test_yolov5.py | 71 ++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 25 deletions(-) diff --git a/tests/models/test_yolov5.py b/tests/models/test_yolov5.py index 7cc9f082299..3d79ed33d61 100755 --- a/tests/models/test_yolov5.py +++ b/tests/models/test_yolov5.py @@ -37,12 +37,6 @@ def test_detection_yolov5(): model = fd.vision.detection.YOLOv5( model_file, runtime_option=rc.test_option) - preprocessor = fd.vision.detection.YOLOv5Preprocessor() - postprocessor = fd.vision.detection.YOLOv5Postprocessor() - - rc.test_option.set_model_path(model_file, model_format=ModelFormat.ONNX) - runtime = fd.Runtime(rc.test_option) - with open("resources/yolov5_result1.pkl", "rb") as f: expect1 = pickle.load(f) @@ -54,25 +48,6 @@ def test_detection_yolov5(): im2 = cv2.imread("./resources/000000570688.jpg") for i in range(3): - # test runtime - input_tensors, ims_info = preprocessor.run([im1]) - output_tensors = runtime.infer({"images": input_tensors[0]}) - results = postprocessor.run(output_tensors, ims_info) - result1 = results[0] - - diff_boxes_1 = np.fabs( - np.array(result1.boxes) - np.array(expect1["boxes"])) - diff_label_1 = np.fabs( - np.array(result1.label_ids) - np.array(expect1["label_ids"])) - diff_scores_1 = np.fabs( - np.array(result1.scores) - np.array(expect1["scores"])) - - assert diff_boxes_1.max( - ) < 1e-06, "There's difference in detection boxes 1." - assert diff_label_1.max( - ) < 1e-06, "There's difference in detection label 1." - assert diff_scores_1.max( - ) < 1e-05, "There's difference in detection score 1." # test single predict result1 = model.predict(im1) result2 = model.predict(im2) @@ -140,5 +115,51 @@ def test_detection_yolov5(): ) < 1e-05, "There's difference in detection score 2." +def test_detection_yolov5_runtime(): + model_url = "https://bj.bcebos.com/paddlehub/fastdeploy/yolov5s.onnx" + input_url1 = "https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg" + result_url1 = "https://bj.bcebos.com/paddlehub/fastdeploy/yolov5_result1.pkl" + fd.download(model_url, "resources") + fd.download(input_url1, "resources") + fd.download(result_url1, "resources") + + model_file = "resources/yolov5s.onnx" + + preprocessor = fd.vision.detection.YOLOv5Preprocessor() + postprocessor = fd.vision.detection.YOLOv5Postprocessor() + + rc.test_option.set_model_path(model_file, model_format=ModelFormat.ONNX) + rc.test_option.use_openvino_backend() + runtime = fd.Runtime(rc.test_option) + + with open("resources/yolov5_result1.pkl", "rb") as f: + expect1 = pickle.load(f) + + # compare diff + im1 = cv2.imread("./resources/000000014439.jpg") + + for i in range(3): + # test runtime + input_tensors, ims_info = preprocessor.run([im1.copy()]) + output_tensors = runtime.infer({"images": input_tensors[0]}) + results = postprocessor.run(output_tensors, ims_info) + result1 = results[0] + + diff_boxes_1 = np.fabs( + np.array(result1.boxes) - np.array(expect1["boxes"])) + diff_label_1 = np.fabs( + np.array(result1.label_ids) - np.array(expect1["label_ids"])) + diff_scores_1 = np.fabs( + np.array(result1.scores) - np.array(expect1["scores"])) + + assert diff_boxes_1.max( + ) < 1e-04, "There's difference in detection boxes 1." + assert diff_label_1.max( + ) < 1e-06, "There's difference in detection label 1." + assert diff_scores_1.max( + ) < 1e-05, "There's difference in detection score 1." + + if __name__ == "__main__": test_detection_yolov5() + test_detection_yolov5_runtime() From 97085a2065db952941eaeb1bdaccc94ac82002f1 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Mon, 14 Nov 2022 11:40:49 +0000 Subject: [PATCH 38/50] fixed batch eval bug --- .../vision/detection/contrib/yolov5.py | 6 ++--- .../fastdeploy/vision/evaluation/detection.py | 24 ++++++++++--------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/python/fastdeploy/vision/detection/contrib/yolov5.py b/python/fastdeploy/vision/detection/contrib/yolov5.py index 60ee46eec34..42eccb88d4a 100644 --- a/python/fastdeploy/vision/detection/contrib/yolov5.py +++ b/python/fastdeploy/vision/detection/contrib/yolov5.py @@ -140,17 +140,17 @@ def __init__(self, # 通过self.initialized判断整个模型的初始化是否成功 assert self.initialized, "YOLOv5 initialize failed." - def predict(self, input_image, conf_threshold=0.25, nms_threshold=0.5): + def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5): """Detect an input image :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format :param conf_threshold: confidence threshold for postprocessing, default is 0.25 - :param nms_threshold: iou threshold for NMS, default is 0.5 + :param nms_iou_threshold: iou threshold for NMS, default is 0.5 :return: DetectionResult """ self.postprocessor.conf_threshold = conf_threshold - self.postprocessor.nms_threshold = nms_threshold + self.postprocessor.nms_threshold = nms_iou_threshold return self._model.predict(input_image) def batch_predict(self, images): diff --git a/python/fastdeploy/vision/evaluation/detection.py b/python/fastdeploy/vision/evaluation/detection.py index d670729ef2a..a13e0429e02 100644 --- a/python/fastdeploy/vision/evaluation/detection.py +++ b/python/fastdeploy/vision/evaluation/detection.py @@ -55,6 +55,8 @@ def eval_detection(model, start_time = 0 end_time = 0 average_inference_time = 0 + im_list = list() + im_id_list = list() for image_info, i in zip(all_image_info, trange( image_num, desc="Inference Progress")): @@ -76,28 +78,28 @@ def eval_detection(model, } eval_metric.update(im_id, pred) else: - im_list = list() - im_id_list = list() im_list.append(im) im_id_list.append(im_id) - if (i + 1) % batch_size != 0: + # If the batch_size is not satisfied, the remaining pictures are formed into a batch + if (i + 1) % batch_size != 0 and i != image_num - 1: continue if conf_threshold is None and nms_iou_threshold is None: results = model.batch_predict(im_list) else: model.postprocessor.conf_threshold = conf_threshold model.postprocessor.nms_threshold = nms_iou_threshold - results = model.batch_predict(im_list, conf_threshold, - nms_iou_threshold) - for b in range(batch_size): + results = model.batch_predict(im_list) + for k in range(len(im_list)): pred = { 'bbox': [[c] + [s] + b - for b, s, c in zip(results[b].boxes, results[ - b].scores, results[b].label_ids)], - 'bbox_num': len(results[b].boxes), - 'im_id': im_id_list[b] + for b, s, c in zip(results[k].boxes, results[ + k].scores, results[k].label_ids)], + 'bbox_num': len(results[k].boxes), + 'im_id': im_id_list[k] } - eval_metric.update(im_id_list[b], pred) + eval_metric.update(im_id_list[k], pred) + im_list.clear() + im_id_list.clear() if i == image_num - 1: end_time = time.time() From ac9b1a7b32d0f57c73e808f9cfa0acc97eeb23a7 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Mon, 14 Nov 2022 12:55:39 +0000 Subject: [PATCH 39/50] fixed preprocess bug --- fastdeploy/core/fd_tensor.cc | 8 ++++++++ fastdeploy/core/fd_tensor.h | 8 +------- .../vision/detection/contrib/yolov5/yolov5_pybind.cc | 3 +++ 3 files changed, 12 insertions(+), 7 deletions(-) mode change 100644 => 100755 fastdeploy/core/fd_tensor.cc mode change 100644 => 100755 fastdeploy/core/fd_tensor.h diff --git a/fastdeploy/core/fd_tensor.cc b/fastdeploy/core/fd_tensor.cc old mode 100644 new mode 100755 index a3544756b37..86ce866f4e5 --- a/fastdeploy/core/fd_tensor.cc +++ b/fastdeploy/core/fd_tensor.cc @@ -43,6 +43,14 @@ const void* FDTensor::Data() const { return buffer_; } +void FDTensor::StopSharing() { + if (IsShared()) { + ReallocFn(Nbytes()); + CopyBuffer(buffer_, external_data_ptr, Nbytes()); + external_data_ptr = nullptr; + } +} + const void* FDTensor::CpuData() const { if (device == Device::GPU) { #ifdef WITH_GPU diff --git a/fastdeploy/core/fd_tensor.h b/fastdeploy/core/fd_tensor.h old mode 100644 new mode 100755 index 32a0da86798..7deb4822988 --- a/fastdeploy/core/fd_tensor.h +++ b/fastdeploy/core/fd_tensor.h @@ -61,13 +61,7 @@ struct FASTDEPLOY_DECL FDTensor { return external_data_ptr != nullptr; } - void StopSharing() { - if (IsShared()) { - ReallocFn(Nbytes()); - CopyBuffer(buffer_, external_data_ptr, Nbytes()); - external_data_ptr = nullptr; - } - } + void StopSharing(); const void* Data() const; diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc index 4e2f69957e4..f44891d9846 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc @@ -29,6 +29,9 @@ void BindYOLOv5(pybind11::module& m) { if (!self.Run(&images, &outputs, &ims_info)) { pybind11::eval("raise Exception('Failed to preprocess the input data in PaddleClasPreprocessor.')"); } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } return make_pair(outputs, ims_info); }) .def_property("size", &vision::detection::YOLOv5Preprocessor::GetSize, &vision::detection::YOLOv5Preprocessor::SetSize) From aa06dc6bc576f230758c019872bf26990a7547d2 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Wed, 16 Nov 2022 08:15:17 +0000 Subject: [PATCH 40/50] refactor yolov7 --- fastdeploy/vision.h | 2 +- .../detection/contrib/yolov5/postprocessor.cc | 6 +- .../detection/contrib/yolov5/postprocessor.h | 4 +- .../detection/contrib/yolov5/preprocessor.cc | 26 +- .../detection/contrib/yolov5/preprocessor.h | 11 + .../detection/contrib/yolov5/yolov5_pybind.cc | 3 +- fastdeploy/vision/detection/contrib/yolov7.cc | 344 ------------------ fastdeploy/vision/detection/contrib/yolov7.h | 113 ------ .../detection/contrib/yolov7/postprocessor.cc | 103 ++++++ .../detection/contrib/yolov7/postprocessor.h | 66 ++++ .../detection/contrib/yolov7/preprocessor.cc | 131 +++++++ .../detection/contrib/yolov7/preprocessor.h | 98 +++++ .../vision/detection/contrib/yolov7/yolov7.cc | 89 +++++ .../vision/detection/contrib/yolov7/yolov7.h | 88 +++++ .../detection/contrib/yolov7/yolov7_pybind.cc | 87 +++++ .../vision/detection/contrib/yolov7_pybind.cc | 42 --- .../fastdeploy/vision/detection/__init__.py | 2 +- .../vision/detection/contrib/yolov5.py | 19 +- .../vision/detection/contrib/yolov7.py | 205 ++++++----- 19 files changed, 839 insertions(+), 600 deletions(-) delete mode 100755 fastdeploy/vision/detection/contrib/yolov7.cc delete mode 100644 fastdeploy/vision/detection/contrib/yolov7.h create mode 100755 fastdeploy/vision/detection/contrib/yolov7/postprocessor.cc create mode 100755 fastdeploy/vision/detection/contrib/yolov7/postprocessor.h create mode 100755 fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc create mode 100755 fastdeploy/vision/detection/contrib/yolov7/preprocessor.h create mode 100755 fastdeploy/vision/detection/contrib/yolov7/yolov7.cc create mode 100755 fastdeploy/vision/detection/contrib/yolov7/yolov7.h create mode 100755 fastdeploy/vision/detection/contrib/yolov7/yolov7_pybind.cc delete mode 100644 fastdeploy/vision/detection/contrib/yolov7_pybind.cc diff --git a/fastdeploy/vision.h b/fastdeploy/vision.h index 2f8c7066131..5dc9f4aa3da 100755 --- a/fastdeploy/vision.h +++ b/fastdeploy/vision.h @@ -24,7 +24,7 @@ #include "fastdeploy/vision/detection/contrib/yolov5/yolov5.h" #include "fastdeploy/vision/detection/contrib/yolov5lite.h" #include "fastdeploy/vision/detection/contrib/yolov6.h" -#include "fastdeploy/vision/detection/contrib/yolov7.h" +#include "fastdeploy/vision/detection/contrib/yolov7/yolov7.h" #include "fastdeploy/vision/detection/contrib/yolov7end2end_ort.h" #include "fastdeploy/vision/detection/contrib/yolov7end2end_trt.h" #include "fastdeploy/vision/detection/contrib/yolox.h" diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc index dd61efb0023..0366fcce006 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc @@ -22,7 +22,7 @@ namespace detection { YOLOv5Postprocessor::YOLOv5Postprocessor() { conf_threshold_ = 0.25; nms_threshold_ = 0.5; - multi_label_ = true; + multi_label_ = false; max_wh_ = 7680.0; } @@ -103,9 +103,9 @@ bool YOLOv5Postprocessor::Run(const std::vector& tensors, std::vector< float ipt_h = iter_ipt->second[0]; float ipt_w = iter_ipt->second[1]; float scale = std::min(out_h / ipt_h, out_w / ipt_w); + float pad_h = (out_h - ipt_h * scale) / 2; + float pad_w = (out_w - ipt_w * scale) / 2; for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) { - float pad_h = (out_h - ipt_h * scale) / 2; - float pad_w = (out_w - ipt_w * scale) / 2; int32_t label_id = ((*results)[bs].label_ids)[i]; // clip box (*results)[bs].boxes[i][0] = (*results)[bs].boxes[i][0] - max_wh_ * label_id; diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h index a1479dd9403..c0ccf58de96 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h @@ -55,12 +55,12 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor { /// Get nms_threshold, default 0.5 float GetNMSThreshold() const { return nms_threshold_; } - /// Set multi_label, default true + /// Set multi_label, set true for eval, default false void SetMultiLabel(bool multi_label) { multi_label_ = multi_label; } - /// Get multi_label, default true + /// Get multi_label, default false bool GetMultiLabel() const { return multi_label_; } protected: diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc index 112a4d4d5da..0933f7f108a 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc @@ -20,6 +20,7 @@ namespace vision { namespace detection { YOLOv5Preprocessor::YOLOv5Preprocessor() { + resize_after_load_ = false; size_ = {640, 640}; padding_value_ = {114.0, 114.0, 114.0}; is_mini_pad_ = false; @@ -50,7 +51,9 @@ void YOLOv5Preprocessor::LetterBox(FDMat* mat) { resize_h = size_[1]; resize_w = size_[0]; } - Resize::Run(mat, resize_w, resize_h); + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } if (pad_h > 0 || pad_w > 0) { float half_h = pad_h * 1.0 / 2; int top = int(round(half_h - 0.1)); @@ -67,18 +70,19 @@ bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output, // Record the shape of image and the shape of preprocessed image (*im_info)["input_shape"] = {static_cast(mat->Height()), static_cast(mat->Width())}; - // process after image load - double ratio = (size_[0] * 1.0) / std::max(static_cast(mat->Height()), - static_cast(mat->Width())); - if (std::fabs(ratio - 1.0f) > 1e-06) { - int interp = cv::INTER_AREA; - if (ratio > 1.0) { - interp = cv::INTER_LINEAR; + if (resize_after_load_) { + double ratio = (size_[0] * 1.0) / std::max(static_cast(mat->Height()), + static_cast(mat->Width())); + if (std::fabs(ratio - 1.0f) > 1e-06) { + int interp = cv::INTER_AREA; + if (ratio > 1.0) { + interp = cv::INTER_LINEAR; + } + int resize_h = int(mat->Height() * ratio); + int resize_w = int(mat->Width() * ratio); + Resize::Run(mat, resize_w, resize_h, -1, -1, interp); } - int resize_h = int(mat->Height() * ratio); - int resize_w = int(mat->Width() * ratio); - Resize::Run(mat, resize_w, resize_h, -1, -1, interp); } // yolov5's preprocess steps // 1. letterbox diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h index b3559685db7..41aa25466cb 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h +++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h @@ -52,12 +52,23 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor { /// Get padding value, size should be the same as channels std::vector GetPaddingValue() const { return padding_value_; } + /// Set resize_after_load, may have an impact on map, default false + void SetResizeAfterLoad(bool resize_after_load) { + resize_after_load_ = resize_after_load; + } + + /// Get resize_after_load, default false + bool GetResizeAfterLoad() const { return resize_after_load_; } + protected: bool Preprocess(FDMat* mat, FDTensor* output, std::map>* im_info); void LetterBox(FDMat* mat); + // whether resize after image load, may have an impact on map, default false + bool resize_after_load_; + // target size, tuple of (width, height), default size = {640, 640} std::vector size_; diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc index f44891d9846..03e223e82fc 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc @@ -35,7 +35,8 @@ void BindYOLOv5(pybind11::module& m) { return make_pair(outputs, ims_info); }) .def_property("size", &vision::detection::YOLOv5Preprocessor::GetSize, &vision::detection::YOLOv5Preprocessor::SetSize) - .def_property("padding_value", &vision::detection::YOLOv5Preprocessor::GetPaddingValue, &vision::detection::YOLOv5Preprocessor::SetPaddingValue); + .def_property("padding_value", &vision::detection::YOLOv5Preprocessor::GetPaddingValue, &vision::detection::YOLOv5Preprocessor::SetPaddingValue) + .def_property("resize_after_load", &vision::detection::YOLOv5Preprocessor::GetResizeAfterLoad, &vision::detection::YOLOv5Preprocessor::SetResizeAfterLoad); pybind11::class_( m, "YOLOv5Postprocessor") diff --git a/fastdeploy/vision/detection/contrib/yolov7.cc b/fastdeploy/vision/detection/contrib/yolov7.cc deleted file mode 100755 index 9185e16ed0e..00000000000 --- a/fastdeploy/vision/detection/contrib/yolov7.cc +++ /dev/null @@ -1,344 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/detection/contrib/yolov7.h" - -#include "fastdeploy/utils/perf.h" -#include "fastdeploy/vision/utils/utils.h" -#ifdef ENABLE_CUDA_PREPROCESS -#include "fastdeploy/vision/utils/cuda_utils.h" -#endif // ENABLE_CUDA_PREPROCESS - -namespace fastdeploy { -namespace vision { -namespace detection { - -void YOLOv7::LetterBox(Mat* mat, const std::vector& size, - const std::vector& color, bool _auto, - bool scale_fill, bool scale_up, int stride) { - float scale = - std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width()); - if (!scale_up) { - scale = std::min(scale, 1.0f); - } - - int resize_h = int(round(mat->Height() * scale)); - int resize_w = int(round(mat->Width() * scale)); - - int pad_w = size[0] - resize_w; - int pad_h = size[1] - resize_h; - if (_auto) { - pad_h = pad_h % stride; - pad_w = pad_w % stride; - } else if (scale_fill) { - pad_h = 0; - pad_w = 0; - resize_h = size[1]; - resize_w = size[0]; - } - if (resize_h != mat->Height() || resize_w != mat->Width()) { - Resize::Run(mat, resize_w, resize_h); - } - if (pad_h > 0 || pad_w > 0) { - float half_h = pad_h * 1.0 / 2; - int top = int(round(half_h - 0.1)); - int bottom = int(round(half_h + 0.1)); - float half_w = pad_w * 1.0 / 2; - int left = int(round(half_w - 0.1)); - int right = int(round(half_w + 0.1)); - Pad::Run(mat, top, bottom, left, right, color); - } -} - -YOLOv7::YOLOv7(const std::string& model_file, const std::string& params_file, - const RuntimeOption& custom_option, - const ModelFormat& model_format) { - if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; - valid_gpu_backends = {Backend::ORT, Backend::TRT}; - } else { - valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; - valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; - } - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; -#ifdef ENABLE_CUDA_PREPROCESS - cudaSetDevice(runtime_option.device_id); - cudaStream_t stream; - CUDA_CHECK(cudaStreamCreate(&stream)); - cuda_stream_ = reinterpret_cast(stream); - runtime_option.SetExternalStream(cuda_stream_); -#endif // ENABLE_CUDA_PREPROCESS - initialized = Initialize(); -} - -bool YOLOv7::Initialize() { - // parameters for preprocess - size = {640, 640}; - padding_value = {114.0, 114.0, 114.0}; - is_mini_pad = false; - is_no_pad = false; - is_scale_up = false; - stride = 32; - max_wh = 7680.0; - reused_input_tensors_.resize(1); - - if (!InitRuntime()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - // Check if the input shape is dynamic after Runtime already initialized, - // Note that, We need to force is_mini_pad 'false' to keep static - // shape after padding (LetterBox) when the is_dynamic_shape is 'false'. - is_dynamic_input_ = false; - auto shape = InputInfoOfRuntime(0).shape; - for (int i = 0; i < shape.size(); ++i) { - // if height or width is dynamic - if (i >= 2 && shape[i] <= 0) { - is_dynamic_input_ = true; - break; - } - } - if (!is_dynamic_input_) { - is_mini_pad = false; - } - return true; -} - -YOLOv7::~YOLOv7() { -#ifdef ENABLE_CUDA_PREPROCESS - if (use_cuda_preprocessing_) { - CUDA_CHECK(cudaFreeHost(input_img_cuda_buffer_host_)); - CUDA_CHECK(cudaFree(input_img_cuda_buffer_device_)); - CUDA_CHECK(cudaFree(input_tensor_cuda_buffer_device_)); - CUDA_CHECK(cudaStreamDestroy(reinterpret_cast(cuda_stream_))); - } -#endif // ENABLE_CUDA_PREPROCESS -} - -bool YOLOv7::Preprocess(Mat* mat, FDTensor* output, - std::map>* im_info) { - // process after image load - float ratio = std::min(size[1] * 1.0f / static_cast(mat->Height()), - size[0] * 1.0f / static_cast(mat->Width())); - if (std::fabs(ratio - 1.0f) > 1e-06) { - int interp = cv::INTER_AREA; - if (ratio > 1.0) { - interp = cv::INTER_LINEAR; - } - int resize_h = int(mat->Height() * ratio); - int resize_w = int(mat->Width() * ratio); - Resize::Run(mat, resize_w, resize_h, -1, -1, interp); - } - // yolov7's preprocess steps - // 1. letterbox - // 2. BGR->RGB - // 3. HWC->CHW - YOLOv7::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, - is_scale_up, stride); - BGR2RGB::Run(mat); - // Normalize::Run(mat, std::vector(mat->Channels(), 0.0), - // std::vector(mat->Channels(), 1.0)); - // Compute `result = mat * alpha + beta` directly by channel - std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; - std::vector beta = {0.0f, 0.0f, 0.0f}; - Convert::Run(mat, alpha, beta); - - // Record output shape of preprocessed image - (*im_info)["output_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); - mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c - return true; -} - -void YOLOv7::UseCudaPreprocessing(int max_image_size) { -#ifdef ENABLE_CUDA_PREPROCESS - use_cuda_preprocessing_ = true; - is_scale_up = true; - if (input_img_cuda_buffer_host_ == nullptr) { - // prepare input data cache in GPU pinned memory - CUDA_CHECK(cudaMallocHost((void**)&input_img_cuda_buffer_host_, - max_image_size * 3)); - // prepare input data cache in GPU device memory - CUDA_CHECK( - cudaMalloc((void**)&input_img_cuda_buffer_device_, max_image_size * 3)); - CUDA_CHECK(cudaMalloc((void**)&input_tensor_cuda_buffer_device_, - 3 * size[0] * size[1] * sizeof(float))); - } -#else - FDWARNING << "The FastDeploy didn't compile with BUILD_CUDA_SRC=ON." - << std::endl; - use_cuda_preprocessing_ = false; -#endif -} - -bool YOLOv7::CudaPreprocess( - Mat* mat, FDTensor* output, - std::map>* im_info) { -#ifdef ENABLE_CUDA_PREPROCESS - if (is_mini_pad != false || is_no_pad != false || is_scale_up != true) { - FDERROR << "Preprocessing with CUDA is only available when the arguments " - "satisfy (is_mini_pad=false, is_no_pad=false, is_scale_up=true)." - << std::endl; - return false; - } - - // Record the shape of image and the shape of preprocessed image - (*im_info)["input_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - (*im_info)["output_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - - cudaStream_t stream = reinterpret_cast(cuda_stream_); - int src_img_buf_size = mat->Height() * mat->Width() * mat->Channels(); - memcpy(input_img_cuda_buffer_host_, mat->Data(), src_img_buf_size); - CUDA_CHECK(cudaMemcpyAsync(input_img_cuda_buffer_device_, - input_img_cuda_buffer_host_, src_img_buf_size, - cudaMemcpyHostToDevice, stream)); - utils::CudaYoloPreprocess(input_img_cuda_buffer_device_, mat->Width(), - mat->Height(), input_tensor_cuda_buffer_device_, - size[0], size[1], padding_value, stream); - - // Record output shape of preprocessed image - (*im_info)["output_shape"] = {static_cast(size[0]), - static_cast(size[1])}; - - output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32, - input_tensor_cuda_buffer_device_); - output->device = Device::GPU; - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c - return true; -#else - FDERROR << "CUDA src code was not enabled." << std::endl; - return false; -#endif // ENABLE_CUDA_PREPROCESS -} - -bool YOLOv7::Postprocess( - FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold) { - FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); - result->Clear(); - result->Reserve(infer_result.shape[1]); - if (infer_result.dtype != FDDataType::FP32) { - FDERROR << "Only support post process with float32 data." << std::endl; - return false; - } - float* data = static_cast(infer_result.Data()); - for (size_t i = 0; i < infer_result.shape[1]; ++i) { - int s = i * infer_result.shape[2]; - float confidence = data[s + 4]; - float* max_class_score = - std::max_element(data + s + 5, data + s + infer_result.shape[2]); - confidence *= (*max_class_score); - // filter boxes by conf_threshold - if (confidence <= conf_threshold) { - continue; - } - int32_t label_id = std::distance(data + s + 5, max_class_score); - // convert from [x, y, w, h] to [x1, y1, x2, y2] - result->boxes.emplace_back(std::array{ - data[s] - data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh, - data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh}); - result->label_ids.push_back(label_id); - result->scores.push_back(confidence); - } - utils::NMS(result, nms_iou_threshold); - - // scale the boxes to the origin image shape - auto iter_out = im_info.find("output_shape"); - auto iter_ipt = im_info.find("input_shape"); - FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), - "Cannot find input_shape or output_shape from im_info."); - float out_h = iter_out->second[0]; - float out_w = iter_out->second[1]; - float ipt_h = iter_ipt->second[0]; - float ipt_w = iter_ipt->second[1]; - float scale = std::min(out_h / ipt_h, out_w / ipt_w); - float pad_h = (out_h - ipt_h * scale) / 2.0f; - float pad_w = (out_w - ipt_w * scale) / 2.0f; - if (is_mini_pad) { - pad_h = static_cast(static_cast(pad_h) % stride); - pad_w = static_cast(static_cast(pad_w) % stride); - } - for (size_t i = 0; i < result->boxes.size(); ++i) { - int32_t label_id = (result->label_ids)[i]; - // clip box - result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; - result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; - result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; - result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; - result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f); - result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f); - result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f); - result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f); - result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); - result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); - result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); - result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); - } - return true; -} - -bool YOLOv7::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, - float nms_iou_threshold) { - Mat mat(*im); - - std::map> im_info; - - // Record the shape of image and the shape of preprocessed image - im_info["input_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - im_info["output_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - - if (use_cuda_preprocessing_) { - if (!CudaPreprocess(&mat, &reused_input_tensors_[0], &im_info)) { - FDERROR << "Failed to preprocess input image." << std::endl; - return false; - } - } else { - if (!Preprocess(&mat, &reused_input_tensors_[0], &im_info)) { - FDERROR << "Failed to preprocess input image." << std::endl; - return false; - } - } - - reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; - if (!Infer()) { - FDERROR << "Failed to inference." << std::endl; - return false; - } - - if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold, - nms_iou_threshold)) { - FDERROR << "Failed to post process." << std::endl; - return false; - } - - return true; -} - -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov7.h b/fastdeploy/vision/detection/contrib/yolov7.h deleted file mode 100644 index b9d637ed9ab..00000000000 --- a/fastdeploy/vision/detection/contrib/yolov7.h +++ /dev/null @@ -1,113 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" - -namespace fastdeploy { -namespace vision { -namespace detection { -/*! @brief YOLOv7 model object used when to load a YOLOv7 model exported by YOLOv7. - */ -class FASTDEPLOY_DECL YOLOv7 : public FastDeployModel { - public: - /** \brief Set path of model file and the configuration of runtime. - * - * \param[in] model_file Path of model file, e.g ./yolov7.onnx - * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored - * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" - * \param[in] model_format Model format of the loaded model, default is ONNX format - */ - YOLOv7(const std::string& model_file, const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const ModelFormat& model_format = ModelFormat::ONNX); - - ~YOLOv7(); - - virtual std::string ModelName() const { return "yolov7"; } - /** \brief Predict the detection result for an input image - * - * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format - * \param[in] result The output detection result will be writen to this structure - * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.25 - * \param[in] nms_iou_threshold iou threashold for NMS, default is 0.5 - * \return true if the prediction successed, otherwise false - */ - virtual bool Predict(cv::Mat* im, DetectionResult* result, - float conf_threshold = 0.25, - float nms_iou_threshold = 0.5); - - - void UseCudaPreprocessing(int max_img_size = 3840 * 2160); - - /*! @brief - Argument for image preprocessing step, tuple of (width, height), decide the target size after resize, default size = {640, 640} - */ - std::vector size; - // padding value, size should be the same as channels - - std::vector padding_value; - // only pad to the minimum rectange which height and width is times of stride - bool is_mini_pad; - // while is_mini_pad = false and is_no_pad = true, - // will resize the image to the set size - bool is_no_pad; - // if is_scale_up is false, the input image only can be zoom out, - // the maximum resize scale cannot exceed 1.0 - bool is_scale_up; - // padding stride, for is_mini_pad - int stride; - // for offseting the boxes by classes when using NMS - float max_wh; - - private: - bool Initialize(); - - bool Preprocess(Mat* mat, FDTensor* output, - std::map>* im_info); - - bool CudaPreprocess(Mat* mat, FDTensor* output, - std::map>* im_info); - - bool Postprocess(FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold); - - void LetterBox(Mat* mat, const std::vector& size, - const std::vector& color, bool _auto, - bool scale_fill = false, bool scale_up = true, - int stride = 32); - - // whether to inference with dynamic shape (e.g ONNX export with dynamic shape - // or not.) - // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This - // value will - // auto check by fastdeploy after the internal Runtime already initialized. - bool is_dynamic_input_; - // CUDA host buffer for input image - uint8_t* input_img_cuda_buffer_host_ = nullptr; - // CUDA device buffer for input image - uint8_t* input_img_cuda_buffer_device_ = nullptr; - // CUDA device buffer for TRT input tensor - float* input_tensor_cuda_buffer_device_ = nullptr; - // Whether to use CUDA preprocessing - bool use_cuda_preprocessing_ = false; - // CUDA stream - void* cuda_stream_ = nullptr; -}; -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov7/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov7/postprocessor.cc new file mode 100755 index 00000000000..01d657adb30 --- /dev/null +++ b/fastdeploy/vision/detection/contrib/yolov7/postprocessor.cc @@ -0,0 +1,103 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/detection/contrib/yolov7/postprocessor.h" +#include "fastdeploy/vision/utils/utils.h" + +namespace fastdeploy { +namespace vision { +namespace detection { + +YOLOv7Postprocessor::YOLOv7Postprocessor() { + conf_threshold_ = 0.25; + nms_threshold_ = 0.5; + max_wh_ = 7680.0; +} + +bool YOLOv7Postprocessor::Run(const std::vector& tensors, std::vector* results, + const std::vector>>& ims_info) { + int batch = tensors[0].shape[0]; + + results->resize(batch); + + for (size_t bs = 0; bs < batch; ++bs) { + (*results)[bs].Clear(); + (*results)[bs].Reserve(tensors[0].shape[1]); + if (tensors[0].dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + const float* data = reinterpret_cast(tensors[0].Data()) + bs * tensors[0].shape[1] * tensors[0].shape[2]; + for (size_t i = 0; i < tensors[0].shape[1]; ++i) { + int s = i * tensors[0].shape[2]; + float confidence = data[s + 4]; + const float* max_class_score = + std::max_element(data + s + 5, data + s + tensors[0].shape[2]); + confidence *= (*max_class_score); + // filter boxes by conf_threshold + if (confidence <= conf_threshold_) { + continue; + } + int32_t label_id = std::distance(data + s + 5, max_class_score); + // convert from [x, y, w, h] to [x1, y1, x2, y2] + (*results)[bs].boxes.emplace_back(std::array{ + data[s] - data[s + 2] / 2.0f + label_id * max_wh_, + data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh_, + data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh_, + data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh_}); + (*results)[bs].label_ids.push_back(label_id); + (*results)[bs].scores.push_back(confidence); + } + + if ((*results)[bs].boxes.size() == 0) { + return true; + } + + utils::NMS(&((*results)[bs]), nms_threshold_); + + // scale the boxes to the origin image shape + auto iter_out = ims_info[bs].find("output_shape"); + auto iter_ipt = ims_info[bs].find("input_shape"); + FDASSERT(iter_out != ims_info[bs].end() && iter_ipt != ims_info[bs].end(), + "Cannot find input_shape or output_shape from im_info."); + float out_h = iter_out->second[0]; + float out_w = iter_out->second[1]; + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + float scale = std::min(out_h / ipt_h, out_w / ipt_w); + float pad_h = (out_h - ipt_h * scale) / 2; + float pad_w = (out_w - ipt_w * scale) / 2; + for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) { + int32_t label_id = ((*results)[bs].label_ids)[i]; + // clip box + (*results)[bs].boxes[i][0] = (*results)[bs].boxes[i][0] - max_wh_ * label_id; + (*results)[bs].boxes[i][1] = (*results)[bs].boxes[i][1] - max_wh_ * label_id; + (*results)[bs].boxes[i][2] = (*results)[bs].boxes[i][2] - max_wh_ * label_id; + (*results)[bs].boxes[i][3] = (*results)[bs].boxes[i][3] - max_wh_ * label_id; + (*results)[bs].boxes[i][0] = std::max(((*results)[bs].boxes[i][0] - pad_w) / scale, 0.0f); + (*results)[bs].boxes[i][1] = std::max(((*results)[bs].boxes[i][1] - pad_h) / scale, 0.0f); + (*results)[bs].boxes[i][2] = std::max(((*results)[bs].boxes[i][2] - pad_w) / scale, 0.0f); + (*results)[bs].boxes[i][3] = std::max(((*results)[bs].boxes[i][3] - pad_h) / scale, 0.0f); + (*results)[bs].boxes[i][0] = std::min((*results)[bs].boxes[i][0], ipt_w - 1.0f); + (*results)[bs].boxes[i][1] = std::min((*results)[bs].boxes[i][1], ipt_h - 1.0f); + (*results)[bs].boxes[i][2] = std::min((*results)[bs].boxes[i][2], ipt_w - 1.0f); + (*results)[bs].boxes[i][3] = std::min((*results)[bs].boxes[i][3], ipt_h - 1.0f); + } + } + return true; +} + +} // namespace detection +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov7/postprocessor.h b/fastdeploy/vision/detection/contrib/yolov7/postprocessor.h new file mode 100755 index 00000000000..5ece87eb8b7 --- /dev/null +++ b/fastdeploy/vision/detection/contrib/yolov7/postprocessor.h @@ -0,0 +1,66 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "fastdeploy/vision/common/processors/transform.h" +#include "fastdeploy/vision/common/result.h" + +namespace fastdeploy { +namespace vision { + +namespace detection { +/*! @brief Postprocessor object for YOLOv7 serials model. + */ +class FASTDEPLOY_DECL YOLOv7Postprocessor { + public: + /** \brief Create a postprocessor instance for YOLOv7 serials model + */ + YOLOv7Postprocessor(); + + /** \brief Process the result of runtime and fill to DetectionResult structure + * + * \param[in] tensors The inference result from runtime + * \param[in] result The output result of detection + * \param[in] ims_info The shape info list, record input_shape and output_shape + * \return true if the postprocess successed, otherwise false + */ + bool Run(const std::vector& tensors, + std::vector* results, + const std::vector>>& ims_info); + + /// Set conf_threshold, default 0.25 + void SetConfThreshold(const float& conf_threshold) { + conf_threshold_ = conf_threshold; + } + + /// Get conf_threshold, default 0.25 + float GetConfThreshold() const { return conf_threshold_; } + + /// Set nms_threshold, default 0.5 + void SetNMSThreshold(const float& nms_threshold) { + nms_threshold_ = nms_threshold; + } + + /// Get nms_threshold, default 0.5 + float GetNMSThreshold() const { return nms_threshold_; } + + protected: + float conf_threshold_; + float nms_threshold_; + float max_wh_; +}; + +} // namespace detection +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc new file mode 100755 index 00000000000..26d831b0a38 --- /dev/null +++ b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc @@ -0,0 +1,131 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/detection/contrib/yolov7/preprocessor.h" +#include "fastdeploy/function/concat.h" + +namespace fastdeploy { +namespace vision { +namespace detection { + +YOLOv7Preprocessor::YOLOv7Preprocessor() { + resize_after_load_ = false; + size_ = {640, 640}; + padding_value_ = {114.0, 114.0, 114.0}; + is_mini_pad_ = false; + is_no_pad_ = false; + is_scale_up_ = false; + stride_ = 32; + max_wh_ = 7680.0; +} + +void YOLOv7Preprocessor::LetterBox(FDMat* mat) { + float scale = + std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width()); + if (!is_scale_up_) { + scale = std::min(scale, 1.0f); + } + + int resize_h = int(round(mat->Height() * scale)); + int resize_w = int(round(mat->Width() * scale)); + + int pad_w = size_[0] - resize_w; + int pad_h = size_[1] - resize_h; + if (is_mini_pad_) { + pad_h = pad_h % stride_; + pad_w = pad_w % stride_; + } else if (is_no_pad_) { + pad_h = 0; + pad_w = 0; + resize_h = size_[1]; + resize_w = size_[0]; + } + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + if (pad_h > 0 || pad_w > 0) { + float half_h = pad_h * 1.0 / 2; + int top = int(round(half_h - 0.1)); + int bottom = int(round(half_h + 0.1)); + float half_w = pad_w * 1.0 / 2; + int left = int(round(half_w - 0.1)); + int right = int(round(half_w + 0.1)); + Pad::Run(mat, top, bottom, left, right, padding_value_); + } +} + +bool YOLOv7Preprocessor::Preprocess(FDMat* mat, FDTensor* output, + std::map>* im_info) { + // Record the shape of image and the shape of preprocessed image + (*im_info)["input_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + // process after image load + if (resize_after_load_) { + double ratio = (size_[0] * 1.0) / std::max(static_cast(mat->Height()), + static_cast(mat->Width())); + if (std::fabs(ratio - 1.0f) > 1e-06) { + int interp = cv::INTER_AREA; + if (ratio > 1.0) { + interp = cv::INTER_LINEAR; + } + int resize_h = int(mat->Height() * ratio); + int resize_w = int(mat->Width() * ratio); + Resize::Run(mat, resize_w, resize_h, -1, -1, interp); + } + } + // yolov7's preprocess steps + // 1. letterbox + // 2. convert_and_permute(swap_rb=true) + LetterBox(mat); + std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; + std::vector beta = {0.0f, 0.0f, 0.0f}; + ConvertAndPermute::Run(mat, alpha, beta, true); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + mat->ShareWithTensor(output); + output->ExpandDim(0); // reshape to n, h, w, c + return true; +} + +bool YOLOv7Preprocessor::Run(std::vector* images, std::vector* outputs, + std::vector>>* ims_info) { + if (images->size() == 0) { + FDERROR << "The size of input images should be greater than 0." << std::endl; + return false; + } + ims_info->resize(images->size()); + outputs->resize(1); + // Concat all the preprocessed data to a batch tensor + std::vector tensors(images->size()); + for (size_t i = 0; i < images->size(); ++i) { + if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + } + + if (tensors.size() == 1) { + (*outputs)[0] = std::move(tensors[0]); + } else { + function::Concat(tensors, &((*outputs)[0]), 0); + } + return true; +} + +} // namespace detection +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.h b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.h new file mode 100755 index 00000000000..ddcb786e599 --- /dev/null +++ b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.h @@ -0,0 +1,98 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "fastdeploy/vision/common/processors/transform.h" +#include "fastdeploy/vision/common/result.h" + +namespace fastdeploy { +namespace vision { + +namespace detection { +/*! @brief Preprocessor object for YOLOv7 serials model. + */ +class FASTDEPLOY_DECL YOLOv7Preprocessor { + public: + /** \brief Create a preprocessor instance for YOLOv7 serials model + */ + YOLOv7Preprocessor(); + + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input image data list, all the elements are returned by cv::imread() + * \param[in] outputs The output tensors which will feed in runtime + * \param[in] ims_info The shape info list, record input_shape and output_shape + * \return true if the preprocess successed, otherwise false + */ + bool Run(std::vector* images, std::vector* outputs, + std::vector>>* ims_info); + + /// Set target size, tuple of (width, height), default size = {640, 640} + void SetSize(const std::vector& size) { size_ = size; } + + /// Get target size, tuple of (width, height), default size = {640, 640} + std::vector GetSize() const { return size_; } + + /// Set padding value, size should be the same as channels + void SetPaddingValue(const std::vector& padding_value) { + padding_value_ = padding_value; + } + + /// Get padding value, size should be the same as channels + std::vector GetPaddingValue() const { return padding_value_; } + + /// Set resize_after_load, may have an impact on map, default false + void SetResizeAfterLoad(bool resize_after_load) { + resize_after_load_ = resize_after_load; + } + + /// Get resize_after_load, default false + bool GetResizeAfterLoad() const { return resize_after_load_; } + + protected: + bool Preprocess(FDMat* mat, FDTensor* output, + std::map>* im_info); + + void LetterBox(FDMat* mat); + + // whether resize after image load, may have an impact on map, default false + bool resize_after_load_; + + // target size, tuple of (width, height), default size = {640, 640} + std::vector size_; + + // padding value, size should be the same as channels + std::vector padding_value_; + + // only pad to the minimum rectange which height and width is times of stride + bool is_mini_pad_; + + // while is_mini_pad = false and is_no_pad = true, + // will resize the image to the set size + bool is_no_pad_; + + // if is_scale_up is false, the input image only can be zoom out, + // the maximum resize scale cannot exceed 1.0 + bool is_scale_up_; + + // padding stride, for is_mini_pad + int stride_; + + // for offseting the boxes by classes when using NMS + float max_wh_; +}; + +} // namespace detection +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov7/yolov7.cc b/fastdeploy/vision/detection/contrib/yolov7/yolov7.cc new file mode 100755 index 00000000000..513351a095e --- /dev/null +++ b/fastdeploy/vision/detection/contrib/yolov7/yolov7.cc @@ -0,0 +1,89 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/detection/contrib/yolov7/yolov7.h" + +namespace fastdeploy { +namespace vision { +namespace detection { + +YOLOv7::YOLOv7(const std::string& model_file, const std::string& params_file, + const RuntimeOption& custom_option, + const ModelFormat& model_format) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool YOLOv7::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize fastdeploy backend." << std::endl; + return false; + } + return true; +} + +bool YOLOv7::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, float nms_threshold) { + postprocessor_.SetConfThreshold(conf_threshold); + postprocessor_.SetNMSThreshold(nms_threshold); + if (!Predict(*im, result)) { + return false; + } + return true; +} + +bool YOLOv7::Predict(const cv::Mat& im, DetectionResult* result) { + std::vector results; + if (!BatchPredict({im}, &results)) { + return false; + } + *result = std::move(results[0]); + return true; +} + +bool YOLOv7::BatchPredict(const std::vector& images, std::vector* results) { + std::vector>> ims_info; + std::vector fd_images = WrapMat(images); + + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &ims_info)) { + FDERROR << "Failed to preprocess the input image." << std::endl; + return false; + } + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, results, ims_info)) { + FDERROR << "Failed to postprocess the inference results by runtime." << std::endl; + return false; + } + + return true; +} + +} // namespace detection +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov7/yolov7.h b/fastdeploy/vision/detection/contrib/yolov7/yolov7.h new file mode 100755 index 00000000000..2c36fd0c809 --- /dev/null +++ b/fastdeploy/vision/detection/contrib/yolov7/yolov7.h @@ -0,0 +1,88 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/fastdeploy_model.h" +#include "fastdeploy/vision/detection/contrib/yolov7/preprocessor.h" +#include "fastdeploy/vision/detection/contrib/yolov7/postprocessor.h" + +namespace fastdeploy { +namespace vision { +namespace detection { +/*! @brief YOLOv7 model object used when to load a YOLOv7 model exported by YOLOv7. + */ +class FASTDEPLOY_DECL YOLOv7 : public FastDeployModel { + public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolov7.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ + YOLOv7(const std::string& model_file, const std::string& params_file = "", + const RuntimeOption& custom_option = RuntimeOption(), + const ModelFormat& model_format = ModelFormat::ONNX); + + std::string ModelName() const { return "yolov7"; } + + /** \brief DEPRECATED Predict the detection result for an input image, remove at 1.0 version + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output detection result will be writen to this structure + * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.25 + * \param[in] nms_threshold iou threashold for NMS, default is 0.5 + * \return true if the prediction successed, otherwise false + */ + virtual bool Predict(cv::Mat* im, DetectionResult* result, + float conf_threshold = 0.25, + float nms_threshold = 0.5); + + /** \brief Predict the detection result for an input image + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output detection result will be writen to this structure + * \return true if the prediction successed, otherwise false + */ + virtual bool Predict(const cv::Mat& img, DetectionResult* result); + + /** \brief Predict the detection results for a batch of input images + * + * \param[in] imgs, The input image list, each element comes from cv::imread() + * \param[in] results The output detection result list + * \return true if the prediction successed, otherwise false + */ + virtual bool BatchPredict(const std::vector& imgs, + std::vector* results); + + /// Get preprocessor reference of YOLOv7 + virtual YOLOv7Preprocessor& GetPreprocessor() { + return preprocessor_; + } + + /// Get postprocessor reference of YOLOv7 + virtual YOLOv7Postprocessor& GetPostprocessor() { + return postprocessor_; + } + + protected: + bool Initialize(); + YOLOv7Preprocessor preprocessor_; + YOLOv7Postprocessor postprocessor_; +}; + +} // namespace detection +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov7/yolov7_pybind.cc b/fastdeploy/vision/detection/contrib/yolov7/yolov7_pybind.cc new file mode 100755 index 00000000000..9e3dad22ce7 --- /dev/null +++ b/fastdeploy/vision/detection/contrib/yolov7/yolov7_pybind.cc @@ -0,0 +1,87 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/pybind/main.h" + +namespace fastdeploy { +void BindYOLOv7(pybind11::module& m) { + pybind11::class_( + m, "YOLOv7Preprocessor") + .def(pybind11::init<>()) + .def("run", [](vision::detection::YOLOv7Preprocessor& self, std::vector& im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + std::vector>> ims_info; + if (!self.Run(&images, &outputs, &ims_info)) { + pybind11::eval("raise Exception('Failed to preprocess the input data in PaddleClasPreprocessor.')"); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return make_pair(outputs, ims_info); + }) + .def_property("size", &vision::detection::YOLOv7Preprocessor::GetSize, &vision::detection::YOLOv7Preprocessor::SetSize) + .def_property("padding_value", &vision::detection::YOLOv7Preprocessor::GetPaddingValue, &vision::detection::YOLOv7Preprocessor::SetPaddingValue) + .def_property("resize_after_load", &vision::detection::YOLOv7Preprocessor::GetResizeAfterLoad, &vision::detection::YOLOv7Preprocessor::SetResizeAfterLoad); + + pybind11::class_( + m, "YOLOv7Postprocessor") + .def(pybind11::init<>()) + .def("run", [](vision::detection::YOLOv7Postprocessor& self, std::vector& inputs, + const std::vector>>& ims_info) { + std::vector results; + if (!self.Run(inputs, &results, ims_info)) { + pybind11::eval("raise Exception('Failed to postprocess the runtime result in YOLOv7Postprocessor.')"); + } + return results; + }) + .def("run", [](vision::detection::YOLOv7Postprocessor& self, std::vector& input_array, + const std::vector>>& ims_info) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results, ims_info)) { + pybind11::eval("raise Exception('Failed to postprocess the runtime result in YOLOv7Postprocessor.')"); + } + return results; + }) + .def_property("conf_threshold", &vision::detection::YOLOv7Postprocessor::GetConfThreshold, &vision::detection::YOLOv7Postprocessor::SetConfThreshold) + .def_property("nms_threshold", &vision::detection::YOLOv7Postprocessor::GetNMSThreshold, &vision::detection::YOLOv7Postprocessor::SetNMSThreshold); + + pybind11::class_(m, "YOLOv7") + .def(pybind11::init()) + .def("predict", + [](vision::detection::YOLOv7& self, pybind11::array& data) { + auto mat = PyArrayToCvMat(data); + vision::DetectionResult res; + self.Predict(mat, &res); + return res; + }) + .def("batch_predict", [](vision::detection::YOLOv7& self, std::vector& data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly("preprocessor", &vision::detection::YOLOv7::GetPreprocessor) + .def_property_readonly("postprocessor", &vision::detection::YOLOv7::GetPostprocessor); +} +} // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov7_pybind.cc b/fastdeploy/vision/detection/contrib/yolov7_pybind.cc deleted file mode 100644 index d7ab993401d..00000000000 --- a/fastdeploy/vision/detection/contrib/yolov7_pybind.cc +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindYOLOv7(pybind11::module& m) { - pybind11::class_(m, "YOLOv7") - .def(pybind11::init()) - .def("predict", - [](vision::detection::YOLOv7& self, pybind11::array& data, - float conf_threshold, float nms_iou_threshold) { - auto mat = PyArrayToCvMat(data); - vision::DetectionResult res; - self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); - return res; - }) - .def("use_cuda_preprocessing", - [](vision::detection::YOLOv7& self, int max_image_size) { - self.UseCudaPreprocessing(max_image_size); - }) - .def_readwrite("size", &vision::detection::YOLOv7::size) - .def_readwrite("padding_value", &vision::detection::YOLOv7::padding_value) - .def_readwrite("is_mini_pad", &vision::detection::YOLOv7::is_mini_pad) - .def_readwrite("is_no_pad", &vision::detection::YOLOv7::is_no_pad) - .def_readwrite("is_scale_up", &vision::detection::YOLOv7::is_scale_up) - .def_readwrite("stride", &vision::detection::YOLOv7::stride) - .def_readwrite("max_wh", &vision::detection::YOLOv7::max_wh); -} -} // namespace fastdeploy diff --git a/python/fastdeploy/vision/detection/__init__.py b/python/fastdeploy/vision/detection/__init__.py index 6de4a3fa634..b5f01f3a77e 100755 --- a/python/fastdeploy/vision/detection/__init__.py +++ b/python/fastdeploy/vision/detection/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. from __future__ import absolute_import -from .contrib.yolov7 import YOLOv7 +from .contrib.yolov7 import * from .contrib.yolor import YOLOR from .contrib.scaled_yolov4 import ScaledYOLOv4 from .contrib.nanodet_plus import NanoDetPlus diff --git a/python/fastdeploy/vision/detection/contrib/yolov5.py b/python/fastdeploy/vision/detection/contrib/yolov5.py index 42eccb88d4a..e8895083731 100644 --- a/python/fastdeploy/vision/detection/contrib/yolov5.py +++ b/python/fastdeploy/vision/detection/contrib/yolov5.py @@ -41,9 +41,19 @@ def size(self): @property def padding_value(self): + """ + padding value for preprocessing, default [114.0, 114.0, 114.0] + """ # padding value, size should be the same as channels return self._preprocessor.padding_value + @property + def resize_after_load(self): + """ + resize_after_load for preprocessing, may have an impact on map, default false + """ + return self._preprocessor.resize_after_load + @size.setter def size(self, wh): assert isinstance(wh, (list, tuple)),\ @@ -60,6 +70,13 @@ def padding_value(self, value): list), "The value to set `padding_value` must be type of list." self._preprocessor.padding_value = value + @resize_after_load.setter + def resize_after_load(self, value): + assert isinstance( + value, + bool), "The value to set `resize_after_load` must be type of bool." + self._preprocessor.resize_after_load = value + class YOLOv5Postprocessor: def __init__(self): @@ -93,7 +110,7 @@ def nms_threshold(self): @property def multi_label(self): """ - multi_label for postprocessing, default is true + multi_label for postprocessing, set true for eval, default is false """ return self._postprocessor.multi_label diff --git a/python/fastdeploy/vision/detection/contrib/yolov7.py b/python/fastdeploy/vision/detection/contrib/yolov7.py index 0334504851b..8b7ef44305a 100644 --- a/python/fastdeploy/vision/detection/contrib/yolov7.py +++ b/python/fastdeploy/vision/detection/contrib/yolov7.py @@ -18,6 +18,108 @@ from .... import c_lib_wrap as C +class YOLOv7Preprocessor: + def __init__(self): + """Create a preprocessor for YOLOv7 + """ + self._preprocessor = C.vision.detection.YOLOv7Preprocessor() + + def run(self, input_ims): + """Preprocess input images for YOLOv7 + + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor + """ + return self._preprocessor.run(input_ims) + + @property + def size(self): + """ + Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640] + """ + return self._preprocessor.size + + @property + def padding_value(self): + """ + padding value for preprocessing, default [114.0, 114.0, 114.0] + """ + # padding value, size should be the same as channels + return self._preprocessor.padding_value + + @property + def resize_after_load(self): + """ + resize_after_load for preprocessing, may have an impact on map, default false + """ + return self._preprocessor.resize_after_load + + @size.setter + def size(self, wh): + assert isinstance(wh, (list, tuple)),\ + "The value to set `size` must be type of tuple or list." + assert len(wh) == 2,\ + "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh)) + self._preprocessor.size = wh + + @padding_value.setter + def padding_value(self, value): + assert isinstance( + value, + list), "The value to set `padding_value` must be type of list." + self._preprocessor.padding_value = value + + @resize_after_load.setter + def resize_after_load(self, value): + assert isinstance( + value, + bool), "The value to set `resize_after_load` must be type of bool." + self._preprocessor.resize_after_load = value + + +class YOLOv7Postprocessor: + def __init__(self): + """Create a postprocessor for YOLOv7 + """ + self._postprocessor = C.vision.detection.YOLOv7Postprocessor() + + def run(self, runtime_results, ims_info): + """Postprocess the runtime results for YOLOv7 + + :param: runtime_results: (list of FDTensor)The output FDTensor results from runtime + :param: ims_info: (list of dict)Record input_shape and output_shape + :return: list of DetectionResult(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results, ims_info) + + @property + def conf_threshold(self): + """ + confidence threshold for postprocessing, default is 0.25 + """ + return self._postprocessor.conf_threshold + + @property + def nms_threshold(self): + """ + nms threshold for postprocessing, default is 0.5 + """ + return self._postprocessor.nms_threshold + + @conf_threshold.setter + def conf_threshold(self, conf_threshold): + assert isinstance(conf_threshold, float),\ + "The value to set `conf_threshold` must be type of float." + self._postprocessor.conf_threshold = conf_threshold + + @nms_threshold.setter + def nms_threshold(self, nms_threshold): + assert isinstance(nms_threshold, float),\ + "The value to set `nms_threshold` must be type of float." + self._postprocessor.nms_threshold = nms_threshold + + class YOLOv7(FastDeployModel): def __init__(self, model_file, @@ -35,6 +137,7 @@ def __init__(self, # 初始化后的option保存在self._runtime_option super(YOLOv7, self).__init__(runtime_option) + assert model_format == ModelFormat.ONNX, "YOLOv7 only support model format of ModelFormat.ONNX now." self._model = C.vision.detection.YOLOv7( model_file, params_file, self._runtime_option, model_format) # 通过self.initialized判断整个模型的初始化是否成功 @@ -44,96 +147,36 @@ def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5): """Detect an input image :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format - :param conf_threshold: confidence threashold for postprocessing, default is 0.25 - :param nms_iou_threshold: iou threashold for NMS, default is 0.5 + :param conf_threshold: confidence threshold for postprocessing, default is 0.25 + :param nms_iou_threshold: iou threshold for NMS, default is 0.5 :return: DetectionResult """ - return self._model.predict(input_image, conf_threshold, - nms_iou_threshold) - # 一些跟YOLOv7模型有关的属性封装 - # 多数是预处理相关,可通过修改如model.size = [1280, 1280]改变预处理时resize的大小(前提是模型支持) - @property - def size(self): - """ - Argument for image preprocessing step, the preprocess image size, tuple of (width, height), default size = [640, 640] - """ - return self._model.size + self.postprocessor.conf_threshold = conf_threshold + self.postprocessor.nms_threshold = nms_iou_threshold + return self._model.predict(input_image) - @property - def padding_value(self): - # padding value, size should be the same as channels - return self._model.padding_value + def batch_predict(self, images): + """Classify a batch of input image - @property - def is_no_pad(self): - # while is_mini_pad = false and is_no_pad = true, will resize the image to the set size - return self._model.is_no_pad + :param im: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return list of DetectionResult + """ - @property - def is_mini_pad(self): - # only pad to the minimum rectange which height and width is times of stride - return self._model.is_mini_pad + return self._model.batch_predict(images) @property - def is_scale_up(self): - # if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0 - return self._model.is_scale_up + def preprocessor(self): + """Get YOLOv7Preprocessor object of the loaded model - @property - def stride(self): - # padding stride, for is_mini_pad - return self._model.stride + :return YOLOv7Preprocessor + """ + return self._model.preprocessor @property - def max_wh(self): - # for offseting the boxes by classes when using NMS - return self._model.max_wh + def postprocessor(self): + """Get YOLOv7Postprocessor object of the loaded model - @size.setter - def size(self, wh): - assert isinstance(wh, (list, tuple)),\ - "The value to set `size` must be type of tuple or list." - assert len(wh) == 2,\ - "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( - len(wh)) - self._model.size = wh - - @padding_value.setter - def padding_value(self, value): - assert isinstance( - value, - list), "The value to set `padding_value` must be type of list." - self._model.padding_value = value - - @is_no_pad.setter - def is_no_pad(self, value): - assert isinstance( - value, bool), "The value to set `is_no_pad` must be type of bool." - self._model.is_no_pad = value - - @is_mini_pad.setter - def is_mini_pad(self, value): - assert isinstance( - value, - bool), "The value to set `is_mini_pad` must be type of bool." - self._model.is_mini_pad = value - - @is_scale_up.setter - def is_scale_up(self, value): - assert isinstance( - value, - bool), "The value to set `is_scale_up` must be type of bool." - self._model.is_scale_up = value - - @stride.setter - def stride(self, value): - assert isinstance( - value, int), "The value to set `stride` must be type of int." - self._model.stride = value - - @max_wh.setter - def max_wh(self, value): - assert isinstance( - value, float), "The value to set `max_wh` must be type of float." - self._model.max_wh = value + :return YOLOv7Postprocessor + """ + return self._model.postprocessor From 9c5f76631aca9af159476633bd9810dd8d09038d Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Wed, 16 Nov 2022 09:08:25 +0000 Subject: [PATCH 41/50] add yolov7 testcase --- tests/models/test_yolov7.py | 165 ++++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100755 tests/models/test_yolov7.py diff --git a/tests/models/test_yolov7.py b/tests/models/test_yolov7.py new file mode 100755 index 00000000000..ba08fbaf5bc --- /dev/null +++ b/tests/models/test_yolov7.py @@ -0,0 +1,165 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from fastdeploy import ModelFormat +import fastdeploy as fd +import cv2 +import os +import pickle +import numpy as np +import runtime_config as rc + + +def test_detection_yolov7(): + model_url = "https://bj.bcebos.com/paddlehub/fastdeploy/yolov7.onnx" + input_url1 = "https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg" + input_url2 = "https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000570688.jpg" + result_url1 = "https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_result1.pkl" + result_url2 = "https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_result2.pkl" + fd.download(model_url, "resources") + fd.download(input_url1, "resources") + fd.download(input_url2, "resources") + fd.download(result_url1, "resources") + fd.download(result_url2, "resources") + + model_file = "resources/yolov7.onnx" + model = fd.vision.detection.YOLOv7( + model_file, runtime_option=rc.test_option) + + with open("resources/yolov7_result1.pkl", "rb") as f: + expect1 = pickle.load(f) + + with open("resources/yolov7_result2.pkl", "rb") as f: + expect2 = pickle.load(f) + + # compare diff + im1 = cv2.imread("./resources/000000014439.jpg") + im2 = cv2.imread("./resources/000000570688.jpg") + + for i in range(3): + # test single predict + result1 = model.predict(im1) + result2 = model.predict(im2) + + diff_boxes_1 = np.fabs( + np.array(result1.boxes) - np.array(expect1["boxes"])) + diff_boxes_2 = np.fabs( + np.array(result2.boxes) - np.array(expect2["boxes"])) + + diff_label_1 = np.fabs( + np.array(result1.label_ids) - np.array(expect1["label_ids"])) + diff_label_2 = np.fabs( + np.array(result2.label_ids) - np.array(expect2["label_ids"])) + + diff_scores_1 = np.fabs( + np.array(result1.scores) - np.array(expect1["scores"])) + diff_scores_2 = np.fabs( + np.array(result2.scores) - np.array(expect2["scores"])) + + assert diff_boxes_1.max( + ) < 1e-06, "There's difference in detection boxes 1." + assert diff_label_1.max( + ) < 1e-06, "There's difference in detection label 1." + assert diff_scores_1.max( + ) < 1e-05, "There's difference in detection score 1." + + assert diff_boxes_2.max( + ) < 1e-06, "There's difference in detection boxes 2." + assert diff_label_2.max( + ) < 1e-06, "There's difference in detection label 2." + assert diff_scores_2.max( + ) < 1e-05, "There's difference in detection score 2." + + # test batch predict + results = model.batch_predict([im1, im2]) + result1 = results[0] + result2 = results[1] + + diff_boxes_1 = np.fabs( + np.array(result1.boxes) - np.array(expect1["boxes"])) + diff_boxes_2 = np.fabs( + np.array(result2.boxes) - np.array(expect2["boxes"])) + + diff_label_1 = np.fabs( + np.array(result1.label_ids) - np.array(expect1["label_ids"])) + diff_label_2 = np.fabs( + np.array(result2.label_ids) - np.array(expect2["label_ids"])) + + diff_scores_1 = np.fabs( + np.array(result1.scores) - np.array(expect1["scores"])) + diff_scores_2 = np.fabs( + np.array(result2.scores) - np.array(expect2["scores"])) + assert diff_boxes_1.max( + ) < 1e-06, "There's difference in detection boxes 1." + assert diff_label_1.max( + ) < 1e-06, "There's difference in detection label 1." + assert diff_scores_1.max( + ) < 1e-05, "There's difference in detection score 1." + + assert diff_boxes_2.max( + ) < 1e-06, "There's difference in detection boxes 2." + assert diff_label_2.max( + ) < 1e-06, "There's difference in detection label 2." + assert diff_scores_2.max( + ) < 1e-05, "There's difference in detection score 2." + + +def test_detection_yolov7_runtime(): + model_url = "https://bj.bcebos.com/paddlehub/fastdeploy/yolov7.onnx" + input_url1 = "https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg" + result_url1 = "https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_result1.pkl" + fd.download(model_url, "resources") + fd.download(input_url1, "resources") + fd.download(result_url1, "resources") + + model_file = "resources/yolov7.onnx" + + preprocessor = fd.vision.detection.YOLOv7Preprocessor() + postprocessor = fd.vision.detection.YOLOv7Postprocessor() + + rc.test_option.set_model_path(model_file, model_format=ModelFormat.ONNX) + rc.test_option.use_openvino_backend() + runtime = fd.Runtime(rc.test_option) + + with open("resources/yolov7_result1.pkl", "rb") as f: + expect1 = pickle.load(f) + + # compare diff + im1 = cv2.imread("./resources/000000014439.jpg") + + for i in range(3): + # test runtime + input_tensors, ims_info = preprocessor.run([im1.copy()]) + output_tensors = runtime.infer({"images": input_tensors[0]}) + results = postprocessor.run(output_tensors, ims_info) + result1 = results[0] + + diff_boxes_1 = np.fabs( + np.array(result1.boxes) - np.array(expect1["boxes"])) + diff_label_1 = np.fabs( + np.array(result1.label_ids) - np.array(expect1["label_ids"])) + diff_scores_1 = np.fabs( + np.array(result1.scores) - np.array(expect1["scores"])) + + assert diff_boxes_1.max( + ) < 1e-04, "There's difference in detection boxes 1." + assert diff_label_1.max( + ) < 1e-06, "There's difference in detection label 1." + assert diff_scores_1.max( + ) < 1e-05, "There's difference in detection score 1." + + +if __name__ == "__main__": + test_detection_yolov7() + test_detection_yolov7_runtime() From e39d60a111f12a1a434e7d9a01688a1c5c50e475 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Thu, 17 Nov 2022 03:49:51 +0000 Subject: [PATCH 42/50] rm resize_after_load and add is_scale_up --- .../detection/contrib/yolov5/preprocessor.cc | 17 +---------------- .../detection/contrib/yolov5/preprocessor.h | 14 ++++++-------- .../detection/contrib/yolov5/yolov5_pybind.cc | 2 +- .../detection/contrib/yolov7/preprocessor.cc | 17 +---------------- .../detection/contrib/yolov7/preprocessor.h | 14 ++++++-------- .../detection/contrib/yolov7/yolov7_pybind.cc | 2 +- .../vision/detection/contrib/yolov5.py | 14 +++++++------- .../vision/detection/contrib/yolov7.py | 14 +++++++------- 8 files changed, 30 insertions(+), 64 deletions(-) diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc index 0933f7f108a..6b1b6f82193 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc @@ -20,12 +20,11 @@ namespace vision { namespace detection { YOLOv5Preprocessor::YOLOv5Preprocessor() { - resize_after_load_ = false; size_ = {640, 640}; padding_value_ = {114.0, 114.0, 114.0}; is_mini_pad_ = false; is_no_pad_ = false; - is_scale_up_ = false; + is_scale_up_ = true; stride_ = 32; max_wh_ = 7680.0; } @@ -70,20 +69,6 @@ bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output, // Record the shape of image and the shape of preprocessed image (*im_info)["input_shape"] = {static_cast(mat->Height()), static_cast(mat->Width())}; - // process after image load - if (resize_after_load_) { - double ratio = (size_[0] * 1.0) / std::max(static_cast(mat->Height()), - static_cast(mat->Width())); - if (std::fabs(ratio - 1.0f) > 1e-06) { - int interp = cv::INTER_AREA; - if (ratio > 1.0) { - interp = cv::INTER_LINEAR; - } - int resize_h = int(mat->Height() * ratio); - int resize_w = int(mat->Width() * ratio); - Resize::Run(mat, resize_w, resize_h, -1, -1, interp); - } - } // yolov5's preprocess steps // 1. letterbox // 2. convert_and_permute(swap_rb=true) diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h index 41aa25466cb..f0cf438df0c 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h +++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.h @@ -52,13 +52,14 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor { /// Get padding value, size should be the same as channels std::vector GetPaddingValue() const { return padding_value_; } - /// Set resize_after_load, may have an impact on map, default false - void SetResizeAfterLoad(bool resize_after_load) { - resize_after_load_ = resize_after_load; + /// Set is_scale_up, if is_scale_up is false, the input image only + /// can be zoom out, the maximum resize scale cannot exceed 1.0, default true + void SetScaleUp(bool is_scale_up) { + is_scale_up_ = is_scale_up; } - /// Get resize_after_load, default false - bool GetResizeAfterLoad() const { return resize_after_load_; } + /// Get is_scale_up, default true + bool GetScaleUp() const { return is_scale_up_; } protected: bool Preprocess(FDMat* mat, FDTensor* output, @@ -66,9 +67,6 @@ class FASTDEPLOY_DECL YOLOv5Preprocessor { void LetterBox(FDMat* mat); - // whether resize after image load, may have an impact on map, default false - bool resize_after_load_; - // target size, tuple of (width, height), default size = {640, 640} std::vector size_; diff --git a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc index 03e223e82fc..7b1574401fb 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5_pybind.cc @@ -36,7 +36,7 @@ void BindYOLOv5(pybind11::module& m) { }) .def_property("size", &vision::detection::YOLOv5Preprocessor::GetSize, &vision::detection::YOLOv5Preprocessor::SetSize) .def_property("padding_value", &vision::detection::YOLOv5Preprocessor::GetPaddingValue, &vision::detection::YOLOv5Preprocessor::SetPaddingValue) - .def_property("resize_after_load", &vision::detection::YOLOv5Preprocessor::GetResizeAfterLoad, &vision::detection::YOLOv5Preprocessor::SetResizeAfterLoad); + .def_property("is_scale_up", &vision::detection::YOLOv5Preprocessor::GetScaleUp, &vision::detection::YOLOv5Preprocessor::SetScaleUp); pybind11::class_( m, "YOLOv5Postprocessor") diff --git a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc index 26d831b0a38..f669d145e4e 100755 --- a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc @@ -20,12 +20,11 @@ namespace vision { namespace detection { YOLOv7Preprocessor::YOLOv7Preprocessor() { - resize_after_load_ = false; size_ = {640, 640}; padding_value_ = {114.0, 114.0, 114.0}; is_mini_pad_ = false; is_no_pad_ = false; - is_scale_up_ = false; + is_scale_up_ = true; stride_ = 32; max_wh_ = 7680.0; } @@ -70,20 +69,6 @@ bool YOLOv7Preprocessor::Preprocess(FDMat* mat, FDTensor* output, // Record the shape of image and the shape of preprocessed image (*im_info)["input_shape"] = {static_cast(mat->Height()), static_cast(mat->Width())}; - // process after image load - if (resize_after_load_) { - double ratio = (size_[0] * 1.0) / std::max(static_cast(mat->Height()), - static_cast(mat->Width())); - if (std::fabs(ratio - 1.0f) > 1e-06) { - int interp = cv::INTER_AREA; - if (ratio > 1.0) { - interp = cv::INTER_LINEAR; - } - int resize_h = int(mat->Height() * ratio); - int resize_w = int(mat->Width() * ratio); - Resize::Run(mat, resize_w, resize_h, -1, -1, interp); - } - } // yolov7's preprocess steps // 1. letterbox // 2. convert_and_permute(swap_rb=true) diff --git a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.h b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.h index ddcb786e599..ff6c6cad55e 100755 --- a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.h +++ b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.h @@ -52,13 +52,14 @@ class FASTDEPLOY_DECL YOLOv7Preprocessor { /// Get padding value, size should be the same as channels std::vector GetPaddingValue() const { return padding_value_; } - /// Set resize_after_load, may have an impact on map, default false - void SetResizeAfterLoad(bool resize_after_load) { - resize_after_load_ = resize_after_load; + /// Set is_scale_up, if is_scale_up is false, the input image only + /// can be zoom out, the maximum resize scale cannot exceed 1.0, default true + void SetScaleUp(bool is_scale_up) { + is_scale_up_ = is_scale_up; } - /// Get resize_after_load, default false - bool GetResizeAfterLoad() const { return resize_after_load_; } + /// Get is_scale_up, default true + bool GetScaleUp() const { return is_scale_up_; } protected: bool Preprocess(FDMat* mat, FDTensor* output, @@ -66,9 +67,6 @@ class FASTDEPLOY_DECL YOLOv7Preprocessor { void LetterBox(FDMat* mat); - // whether resize after image load, may have an impact on map, default false - bool resize_after_load_; - // target size, tuple of (width, height), default size = {640, 640} std::vector size_; diff --git a/fastdeploy/vision/detection/contrib/yolov7/yolov7_pybind.cc b/fastdeploy/vision/detection/contrib/yolov7/yolov7_pybind.cc index 9e3dad22ce7..6899faa9167 100755 --- a/fastdeploy/vision/detection/contrib/yolov7/yolov7_pybind.cc +++ b/fastdeploy/vision/detection/contrib/yolov7/yolov7_pybind.cc @@ -36,7 +36,7 @@ void BindYOLOv7(pybind11::module& m) { }) .def_property("size", &vision::detection::YOLOv7Preprocessor::GetSize, &vision::detection::YOLOv7Preprocessor::SetSize) .def_property("padding_value", &vision::detection::YOLOv7Preprocessor::GetPaddingValue, &vision::detection::YOLOv7Preprocessor::SetPaddingValue) - .def_property("resize_after_load", &vision::detection::YOLOv7Preprocessor::GetResizeAfterLoad, &vision::detection::YOLOv7Preprocessor::SetResizeAfterLoad); + .def_property("is_scale_up", &vision::detection::YOLOv7Preprocessor::GetScaleUp, &vision::detection::YOLOv7Preprocessor::SetScaleUp); pybind11::class_( m, "YOLOv7Postprocessor") diff --git a/python/fastdeploy/vision/detection/contrib/yolov5.py b/python/fastdeploy/vision/detection/contrib/yolov5.py index e8895083731..4cc68129ff5 100644 --- a/python/fastdeploy/vision/detection/contrib/yolov5.py +++ b/python/fastdeploy/vision/detection/contrib/yolov5.py @@ -48,11 +48,11 @@ def padding_value(self): return self._preprocessor.padding_value @property - def resize_after_load(self): + def is_scale_up(self): """ - resize_after_load for preprocessing, may have an impact on map, default false + is_scale_up for preprocessing, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0, default true """ - return self._preprocessor.resize_after_load + return self._preprocessor.is_scale_up @size.setter def size(self, wh): @@ -70,12 +70,12 @@ def padding_value(self, value): list), "The value to set `padding_value` must be type of list." self._preprocessor.padding_value = value - @resize_after_load.setter - def resize_after_load(self, value): + @is_scale_up.setter + def is_scale_up(self, value): assert isinstance( value, - bool), "The value to set `resize_after_load` must be type of bool." - self._preprocessor.resize_after_load = value + bool), "The value to set `is_scale_up` must be type of bool." + self._preprocessor.is_scale_up = value class YOLOv5Postprocessor: diff --git a/python/fastdeploy/vision/detection/contrib/yolov7.py b/python/fastdeploy/vision/detection/contrib/yolov7.py index 8b7ef44305a..510b72ed653 100644 --- a/python/fastdeploy/vision/detection/contrib/yolov7.py +++ b/python/fastdeploy/vision/detection/contrib/yolov7.py @@ -48,11 +48,11 @@ def padding_value(self): return self._preprocessor.padding_value @property - def resize_after_load(self): + def is_scale_up(self): """ - resize_after_load for preprocessing, may have an impact on map, default false + is_scale_up for preprocessing, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0, default true """ - return self._preprocessor.resize_after_load + return self._preprocessor.is_scale_up @size.setter def size(self, wh): @@ -70,12 +70,12 @@ def padding_value(self, value): list), "The value to set `padding_value` must be type of list." self._preprocessor.padding_value = value - @resize_after_load.setter - def resize_after_load(self, value): + @is_scale_up.setter + def is_scale_up(self, value): assert isinstance( value, - bool), "The value to set `resize_after_load` must be type of bool." - self._preprocessor.resize_after_load = value + bool), "The value to set `is_scale_up` must be type of bool." + self._preprocessor.is_scale_up = value class YOLOv7Postprocessor: From bb1b8fc66c48452ce3332bbd7503a3f5a75ac0f7 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Thu, 17 Nov 2022 09:07:25 +0000 Subject: [PATCH 43/50] fixed bug --- fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc | 2 +- fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc index 6b1b6f82193..846e2513163 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc @@ -50,7 +50,7 @@ void YOLOv5Preprocessor::LetterBox(FDMat* mat) { resize_h = size_[1]; resize_w = size_[0]; } - if (resize_h != mat->Height() || resize_w != mat->Width()) { + if (std::fabs(scale - 1.0f) > 1e-06) { Resize::Run(mat, resize_w, resize_h); } if (pad_h > 0 || pad_w > 0) { diff --git a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc index f669d145e4e..91e22f32b4b 100755 --- a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc @@ -50,7 +50,7 @@ void YOLOv7Preprocessor::LetterBox(FDMat* mat) { resize_h = size_[1]; resize_w = size_[0]; } - if (resize_h != mat->Height() || resize_w != mat->Width()) { + if (std::fabs(scale - 1.0f) > 1e-06) { Resize::Run(mat, resize_w, resize_h); } if (pad_h > 0 || pad_w > 0) { From 5f7b9359d00fb7dff90cddb6e517f40b8a02f224 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Thu, 17 Nov 2022 09:40:51 +0000 Subject: [PATCH 44/50] set multi_label true --- fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc | 2 +- fastdeploy/vision/detection/contrib/yolov5/postprocessor.h | 4 ++-- python/fastdeploy/vision/detection/contrib/yolov5.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc index 0366fcce006..4fe01dfeb86 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.cc @@ -22,7 +22,7 @@ namespace detection { YOLOv5Postprocessor::YOLOv5Postprocessor() { conf_threshold_ = 0.25; nms_threshold_ = 0.5; - multi_label_ = false; + multi_label_ = true; max_wh_ = 7680.0; } diff --git a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h index c0ccf58de96..88f9400fa20 100755 --- a/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h +++ b/fastdeploy/vision/detection/contrib/yolov5/postprocessor.h @@ -55,12 +55,12 @@ class FASTDEPLOY_DECL YOLOv5Postprocessor { /// Get nms_threshold, default 0.5 float GetNMSThreshold() const { return nms_threshold_; } - /// Set multi_label, set true for eval, default false + /// Set multi_label, set true for eval, default true void SetMultiLabel(bool multi_label) { multi_label_ = multi_label; } - /// Get multi_label, default false + /// Get multi_label, default true bool GetMultiLabel() const { return multi_label_; } protected: diff --git a/python/fastdeploy/vision/detection/contrib/yolov5.py b/python/fastdeploy/vision/detection/contrib/yolov5.py index 4cc68129ff5..b8113f3b83d 100644 --- a/python/fastdeploy/vision/detection/contrib/yolov5.py +++ b/python/fastdeploy/vision/detection/contrib/yolov5.py @@ -110,7 +110,7 @@ def nms_threshold(self): @property def multi_label(self): """ - multi_label for postprocessing, set true for eval, default is false + multi_label for postprocessing, set true for eval, default is True """ return self._postprocessor.multi_label From 845f7a673e87a7fbbb3c97af41954a34e35688ea Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Tue, 22 Nov 2022 09:14:18 +0000 Subject: [PATCH 45/50] optimize rvm preprocess --- fastdeploy/vision/matting/contrib/rvm.cc | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/fastdeploy/vision/matting/contrib/rvm.cc b/fastdeploy/vision/matting/contrib/rvm.cc index 846db6bd60e..7144a9018f7 100755 --- a/fastdeploy/vision/matting/contrib/rvm.cc +++ b/fastdeploy/vision/matting/contrib/rvm.cc @@ -63,20 +63,16 @@ bool RobustVideoMatting::Preprocess( if (resize_h != mat->Height() || resize_w != mat->Width()) { Resize::Run(mat, resize_w, resize_h); } - BGR2RGB::Run(mat); - - // Normalize + // Convert_and_permute(swap_rb=true) std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; std::vector beta = {0.0f, 0.0f, 0.0f}; - Convert::Run(mat, alpha, beta); + ConvertAndPermute::Run(mat, alpha, beta, true); + // Record output shape of preprocessed image (*im_info)["output_shape"] = {mat->Height(), mat->Width()}; - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); - mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->ExpandDim(0); // reshape to n, h, w, c return true; } @@ -120,8 +116,6 @@ bool RobustVideoMatting::Postprocess( // for alpha float* alpha_ptr = static_cast(alpha.Data()); - // cv::Mat alpha_zero_copy_ref(out_h, out_w, CV_32FC1, alpha_ptr); - // Mat alpha_resized(alpha_zero_copy_ref); // ref-only, zero copy. Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, alpha_ptr); // ref-only, zero copy. if ((out_h != in_h) || (out_w != in_w)) { @@ -130,8 +124,6 @@ bool RobustVideoMatting::Postprocess( // for foreground float* fgr_ptr = static_cast(fgr.Data()); - // cv::Mat fgr_zero_copy_ref(out_h, out_w, CV_32FC1, fgr_ptr); - // Mat fgr_resized(fgr_zero_copy_ref); // ref-only, zero copy. Mat fgr_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, fgr_ptr); // ref-only, zero copy. if ((out_h != in_h) || (out_w != in_w)) { From 8489624210828c8e71a97cb4679c30d7f5ab6726 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Wed, 23 Nov 2022 09:53:01 +0000 Subject: [PATCH 46/50] optimizer rvm postprocess --- .../common/processors/convert_and_permute.cc | 2 +- .../common/processors/normalize_and_permute.cc | 2 +- fastdeploy/vision/common/result.cc | 8 ++++++-- fastdeploy/vision/matting/contrib/rvm.cc | 5 +++-- fastdeploy/vision/matting/contrib/rvm.h | 3 +++ fastdeploy/vision/matting/contrib/rvm_pybind.cc | 3 ++- python/fastdeploy/vision/matting/contrib/rvm.py | 16 ++++++++++++++++ tests/models/test_rvm.py | 1 + 8 files changed, 33 insertions(+), 7 deletions(-) mode change 100644 => 100755 fastdeploy/vision/common/processors/normalize_and_permute.cc mode change 100644 => 100755 tests/models/test_rvm.py diff --git a/fastdeploy/vision/common/processors/convert_and_permute.cc b/fastdeploy/vision/common/processors/convert_and_permute.cc index 73cbb5b48f8..e37bf88cfdd 100644 --- a/fastdeploy/vision/common/processors/convert_and_permute.cc +++ b/fastdeploy/vision/common/processors/convert_and_permute.cc @@ -43,7 +43,7 @@ bool ConvertAndPermute::ImplByOpenCV(FDMat* mat) { for (int i = 0; i < im->channels(); ++i) { cv::extractChannel(split_im[i], cv::Mat(origin_h, origin_w, CV_32FC1, - res.ptr() + i * origin_h * origin_w * 4), + res.ptr() + i * origin_h * origin_w * FDDataTypeSize(mat->Type())), 0); } diff --git a/fastdeploy/vision/common/processors/normalize_and_permute.cc b/fastdeploy/vision/common/processors/normalize_and_permute.cc old mode 100644 new mode 100755 index 93850b97fbf..9484c98d679 --- a/fastdeploy/vision/common/processors/normalize_and_permute.cc +++ b/fastdeploy/vision/common/processors/normalize_and_permute.cc @@ -70,7 +70,7 @@ bool NormalizeAndPermute::ImplByOpenCV(Mat* mat) { for (int i = 0; i < im->channels(); ++i) { cv::extractChannel(split_im[i], cv::Mat(origin_h, origin_w, CV_32FC1, - res.ptr() + i * origin_h * origin_w * 4), + res.ptr() + i * origin_h * origin_w * FDDataTypeSize(mat->Type())), 0); } mat->SetMat(res); diff --git a/fastdeploy/vision/common/result.cc b/fastdeploy/vision/common/result.cc index ee137604891..f3b24c09eab 100755 --- a/fastdeploy/vision/common/result.cc +++ b/fastdeploy/vision/common/result.cc @@ -395,12 +395,16 @@ void MattingResult::Reserve(int size) { } void MattingResult::Resize(int size) { - alpha.resize(size); + if (alpha.capacity() < size) { + alpha.resize(size); + } if (contain_foreground) { FDASSERT((shape.size() == 3), "Please initial shape (h,w,c) before call Resize."); int c = static_cast(shape[2]); - foreground.resize(size * c); + if (foreground.capacity() < size * c) { + foreground.resize(size * c); + } } } diff --git a/fastdeploy/vision/matting/contrib/rvm.cc b/fastdeploy/vision/matting/contrib/rvm.cc index 7144a9018f7..ea37402b044 100755 --- a/fastdeploy/vision/matting/contrib/rvm.cc +++ b/fastdeploy/vision/matting/contrib/rvm.cc @@ -47,6 +47,8 @@ bool RobustVideoMatting::Initialize() { video_mode = true; + swap_rb = true; + if (!InitRuntime()) { FDERROR << "Failed to initialize fastdeploy backend." << std::endl; return false; @@ -66,7 +68,7 @@ bool RobustVideoMatting::Preprocess( // Convert_and_permute(swap_rb=true) std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; std::vector beta = {0.0f, 0.0f, 0.0f}; - ConvertAndPermute::Run(mat, alpha, beta, true); + ConvertAndPermute::Run(mat, alpha, beta, swap_rb); // Record output shape of preprocessed image (*im_info)["output_shape"] = {mat->Height(), mat->Width()}; @@ -130,7 +132,6 @@ bool RobustVideoMatting::Postprocess( Resize::Run(&fgr_resized, in_w, in_h, -1, -1); } - result->Clear(); result->contain_foreground = true; // if contain_foreground == true, shape must set to (h, w, c) result->shape = {static_cast(in_h), static_cast(in_w), 3}; diff --git a/fastdeploy/vision/matting/contrib/rvm.h b/fastdeploy/vision/matting/contrib/rvm.h index 58c64ac3b16..3f842401bcf 100755 --- a/fastdeploy/vision/matting/contrib/rvm.h +++ b/fastdeploy/vision/matting/contrib/rvm.h @@ -58,6 +58,9 @@ class FASTDEPLOY_DECL RobustVideoMatting : public FastDeployModel { /// Whether to open the video mode, if there are some irrelevant pictures, set it to fasle, the default is true // NOLINT bool video_mode; + /// Whether convert to RGB, Set to false if you have converted YUV format images to RGB outside the model, dafault true // NOLINT + bool swap_rb; + private: bool Initialize(); /// Preprocess an input image, and set the preprocessed results to `outputs` diff --git a/fastdeploy/vision/matting/contrib/rvm_pybind.cc b/fastdeploy/vision/matting/contrib/rvm_pybind.cc index a45816d65b5..25d95f51943 100755 --- a/fastdeploy/vision/matting/contrib/rvm_pybind.cc +++ b/fastdeploy/vision/matting/contrib/rvm_pybind.cc @@ -28,7 +28,8 @@ void BindRobustVideoMatting(pybind11::module& m) { return res; }) .def_readwrite("size", &vision::matting::RobustVideoMatting::size) - .def_readwrite("video_mode", &vision::matting::RobustVideoMatting::video_mode); + .def_readwrite("video_mode", &vision::matting::RobustVideoMatting::video_mode) + .def_readwrite("swap_rb", &vision::matting::RobustVideoMatting::swap_rb); } } // namespace fastdeploy diff --git a/python/fastdeploy/vision/matting/contrib/rvm.py b/python/fastdeploy/vision/matting/contrib/rvm.py index 144a3823cdc..174719eae27 100755 --- a/python/fastdeploy/vision/matting/contrib/rvm.py +++ b/python/fastdeploy/vision/matting/contrib/rvm.py @@ -59,6 +59,13 @@ def video_mode(self): """ return self._model.video_mode + @property + def swap_rb(self): + """ + Whether convert to RGB, Set to false if you have converted YUV format images to RGB outside the model, dafault true + """ + return self._model.swap_rb + @size.setter def size(self, wh): """ @@ -79,3 +86,12 @@ def video_mode(self, value): assert isinstance( value, bool), "The value to set `video_mode` must be type of bool." self._model.video_mode = value + + @swap_rb.setter + def swap_rb(self, value): + """ + Set swap_rb property, the default is true + """ + assert isinstance( + value, bool), "The value to set `swap_rb` must be type of bool." + self._model.swap_rb = value diff --git a/tests/models/test_rvm.py b/tests/models/test_rvm.py old mode 100644 new mode 100755 index 4fa3083e59a..c57b3f29d3e --- a/tests/models/test_rvm.py +++ b/tests/models/test_rvm.py @@ -27,6 +27,7 @@ def test_matting_rvm_cpu(): fd.download(input_url, "resources") model_path = "resources/rvm/rvm_mobilenetv3_fp32.onnx" # use ORT + rc.test_option.use_ort_backend() model = fd.vision.matting.RobustVideoMatting( model_path, runtime_option=rc.test_option) From f1d23c801471d9b5f10b3a17eeed5f125ccc7403 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Wed, 23 Nov 2022 11:20:31 +0000 Subject: [PATCH 47/50] fixed bug --- fastdeploy/vision/common/processors/convert_and_permute.cc | 2 +- fastdeploy/vision/common/processors/normalize_and_permute.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fastdeploy/vision/common/processors/convert_and_permute.cc b/fastdeploy/vision/common/processors/convert_and_permute.cc index e37bf88cfdd..73cbb5b48f8 100644 --- a/fastdeploy/vision/common/processors/convert_and_permute.cc +++ b/fastdeploy/vision/common/processors/convert_and_permute.cc @@ -43,7 +43,7 @@ bool ConvertAndPermute::ImplByOpenCV(FDMat* mat) { for (int i = 0; i < im->channels(); ++i) { cv::extractChannel(split_im[i], cv::Mat(origin_h, origin_w, CV_32FC1, - res.ptr() + i * origin_h * origin_w * FDDataTypeSize(mat->Type())), + res.ptr() + i * origin_h * origin_w * 4), 0); } diff --git a/fastdeploy/vision/common/processors/normalize_and_permute.cc b/fastdeploy/vision/common/processors/normalize_and_permute.cc index 9484c98d679..93850b97fbf 100755 --- a/fastdeploy/vision/common/processors/normalize_and_permute.cc +++ b/fastdeploy/vision/common/processors/normalize_and_permute.cc @@ -70,7 +70,7 @@ bool NormalizeAndPermute::ImplByOpenCV(Mat* mat) { for (int i = 0; i < im->channels(); ++i) { cv::extractChannel(split_im[i], cv::Mat(origin_h, origin_w, CV_32FC1, - res.ptr() + i * origin_h * origin_w * FDDataTypeSize(mat->Type())), + res.ptr() + i * origin_h * origin_w * 4), 0); } mat->SetMat(res); From 4ca1a7e7eb6ccb4fe2222d0927b8fdf7af1031b9 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Fri, 25 Nov 2022 02:31:13 +0000 Subject: [PATCH 48/50] deal with comments --- fastdeploy/vision/common/result.cc | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fastdeploy/vision/common/result.cc b/fastdeploy/vision/common/result.cc index f3b24c09eab..ee137604891 100755 --- a/fastdeploy/vision/common/result.cc +++ b/fastdeploy/vision/common/result.cc @@ -395,16 +395,12 @@ void MattingResult::Reserve(int size) { } void MattingResult::Resize(int size) { - if (alpha.capacity() < size) { - alpha.resize(size); - } + alpha.resize(size); if (contain_foreground) { FDASSERT((shape.size() == 3), "Please initial shape (h,w,c) before call Resize."); int c = static_cast(shape[2]); - if (foreground.capacity() < size * c) { - foreground.resize(size * c); - } + foreground.resize(size * c); } } From 44d5ae720b875ffba8042941fd2e9c935ddd4559 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Sat, 3 Dec 2022 11:17:41 +0000 Subject: [PATCH 49/50] fixed bugs --- .../paddledetection/cpp/infer_picodet.cc | 15 ++++++--------- .../paddledetection/cpp/infer_ppyoloe.cc | 6 +++--- 2 files changed, 9 insertions(+), 12 deletions(-) mode change 100644 => 100755 examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc diff --git a/examples/vision/detection/paddledetection/cpp/infer_picodet.cc b/examples/vision/detection/paddledetection/cpp/infer_picodet.cc index 9ecd49e023a..9e71d88c4d9 100644 --- a/examples/vision/detection/paddledetection/cpp/infer_picodet.cc +++ b/examples/vision/detection/paddledetection/cpp/infer_picodet.cc @@ -34,16 +34,15 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) { } auto im = cv::imread(image_file); - auto im_bak = im.clone(); fastdeploy::vision::DetectionResult res; - if (!model.Predict(&im, &res)) { + if (!model.Predict(im, &res)) { std::cerr << "Failed to predict." << std::endl; return; } std::cout << res.Str() << std::endl; - auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5); + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); cv::imwrite("vis_result.jpg", vis_im); std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; } @@ -63,16 +62,15 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file) { } auto im = cv::imread(image_file); - auto im_bak = im.clone(); fastdeploy::vision::DetectionResult res; - if (!model.Predict(&im, &res)) { + if (!model.Predict(im, &res)) { std::cerr << "Failed to predict." << std::endl; return; } std::cout << res.Str() << std::endl; - auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5); + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); cv::imwrite("vis_result.jpg", vis_im); std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; } @@ -93,16 +91,15 @@ void TrtInfer(const std::string& model_dir, const std::string& image_file) { } auto im = cv::imread(image_file); - auto im_bak = im.clone(); fastdeploy::vision::DetectionResult res; - if (!model.Predict(&im, &res)) { + if (!model.Predict(im, &res)) { std::cerr << "Failed to predict." << std::endl; return; } std::cout << res.Str() << std::endl; - auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5); + auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5); cv::imwrite("vis_result.jpg", vis_im); std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; } diff --git a/examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc b/examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc old mode 100644 new mode 100755 index 4559179387f..7ac11f23387 --- a/examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc +++ b/examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc @@ -36,7 +36,7 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) { auto im = cv::imread(image_file); fastdeploy::vision::DetectionResult res; - if (!model.Predict(&im, &res)) { + if (!model.Predict(im, &res)) { std::cerr << "Failed to predict." << std::endl; return; } @@ -64,7 +64,7 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file) { auto im = cv::imread(image_file); fastdeploy::vision::DetectionResult res; - if (!model.Predict(&im, &res)) { + if (!model.Predict(im, &res)) { std::cerr << "Failed to predict." << std::endl; return; } @@ -93,7 +93,7 @@ void TrtInfer(const std::string& model_dir, const std::string& image_file) { auto im = cv::imread(image_file); fastdeploy::vision::DetectionResult res; - if (!model.Predict(&im, &res)) { + if (!model.Predict(im, &res)) { std::cerr << "Failed to predict." << std::endl; return; } From ed617b984d1d56acef229c38195b742a23341795 Mon Sep 17 00:00:00 2001 From: wjj19950828 Date: Sat, 3 Dec 2022 12:05:11 +0000 Subject: [PATCH 50/50] add gpu ov for benchmark --- benchmark/benchmark_ppcls.py | 5 +++++ benchmark/benchmark_ppdet.py | 11 +++++++++++ benchmark/benchmark_ppseg.py | 5 +++++ benchmark/benchmark_yolo.py | 5 +++++ fastdeploy/backends/openvino/ov_backend.cc | 4 ++-- 5 files changed, 28 insertions(+), 2 deletions(-) mode change 100644 => 100755 fastdeploy/backends/openvino/ov_backend.cc diff --git a/benchmark/benchmark_ppcls.py b/benchmark/benchmark_ppcls.py index 8eeeb8cfca0..b4cbcd8c66f 100755 --- a/benchmark/benchmark_ppcls.py +++ b/benchmark/benchmark_ppcls.py @@ -75,6 +75,11 @@ def build_option(args): option.use_ort_backend() elif backend == "paddle": option.use_paddle_backend() + elif backend == "ov": + option.use_openvino_backend() + option.set_openvino_device(name="GPU") + # change name and shape for models + option.set_openvino_shape_info({"x": [1, 3, 224, 224]}) elif backend in ["trt", "paddle_trt"]: option.use_trt_backend() if backend == "paddle_trt": diff --git a/benchmark/benchmark_ppdet.py b/benchmark/benchmark_ppdet.py index 6d08aafb8a8..1a2297b4f82 100755 --- a/benchmark/benchmark_ppdet.py +++ b/benchmark/benchmark_ppdet.py @@ -75,6 +75,17 @@ def build_option(args): option.use_ort_backend() elif backend == "paddle": option.use_paddle_backend() + elif backend == "ov": + option.use_openvino_backend() + # Using GPU and CPU heterogeneous execution mode + option.set_openvino_device("HETERO:GPU,CPU") + # change name and shape for models + option.set_openvino_shape_info({ + "image": [1, 3, 320, 320], + "scale_factor": [1, 2] + }) + # Set CPU up operator + option.set_openvino_cpu_operators(["MulticlassNms"]) elif backend in ["trt", "paddle_trt"]: option.use_trt_backend() if backend == "paddle_trt": diff --git a/benchmark/benchmark_ppseg.py b/benchmark/benchmark_ppseg.py index 7d9df9f0778..b146510d614 100755 --- a/benchmark/benchmark_ppseg.py +++ b/benchmark/benchmark_ppseg.py @@ -75,6 +75,11 @@ def build_option(args): option.use_ort_backend() elif backend == "paddle": option.use_paddle_backend() + elif backend == "ov": + option.use_openvino_backend() + option.set_openvino_device(name="GPU") # use gpu + # change name and shape for models + option.set_openvino_shape_info({"x": [1, 3, 512, 512]}) elif backend in ["trt", "paddle_trt"]: option.use_trt_backend() if backend == "paddle_trt": diff --git a/benchmark/benchmark_yolo.py b/benchmark/benchmark_yolo.py index dd63cefb65a..a90bcab3de3 100755 --- a/benchmark/benchmark_yolo.py +++ b/benchmark/benchmark_yolo.py @@ -75,6 +75,11 @@ def build_option(args): option.use_ort_backend() elif backend == "paddle": option.use_paddle_backend() + elif backend == "ov": + option.use_openvino_backend() + option.set_openvino_device(name="GPU") + # change name and shape for models + option.set_openvino_shape_info({"images": [1, 3, 640, 640]}) elif backend in ["trt", "paddle_trt"]: option.use_trt_backend() if backend == "paddle_trt": diff --git a/fastdeploy/backends/openvino/ov_backend.cc b/fastdeploy/backends/openvino/ov_backend.cc old mode 100644 new mode 100755 index 9e8c2571aba..6858f85471c --- a/fastdeploy/backends/openvino/ov_backend.cc +++ b/fastdeploy/backends/openvino/ov_backend.cc @@ -176,7 +176,7 @@ bool OpenVINOBackend::InitFromPaddle(const std::string& model_file, } ov::AnyMap properties; - if (option_.cpu_thread_num > 0) { + if (option_.device == "CPU" && option_.cpu_thread_num > 0) { properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num; } if (option_.device == "CPU") { @@ -306,7 +306,7 @@ bool OpenVINOBackend::InitFromOnnx(const std::string& model_file, } ov::AnyMap properties; - if (option_.cpu_thread_num > 0) { + if (option_.device == "CPU" && option_.cpu_thread_num > 0) { properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num; } if (option_.device == "CPU") {