diff --git a/benchmark/benchmark_ppdet.py b/benchmark/benchmark_ppdet.py index 8f7033db4f7..6544c7e6011 100755 --- a/benchmark/benchmark_ppdet.py +++ b/benchmark/benchmark_ppdet.py @@ -17,7 +17,7 @@ import os import numpy as np import time - +from tqdm import tqdm def parse_arguments(): import argparse @@ -263,6 +263,9 @@ def cpu_stat_func(self, q, pid, interval=0.0): elif "yolov3" in args.model: model = fd.vision.detection.YOLOv3( model_file, params_file, config_file, runtime_option=option) + elif "yolov8" in args.model: + model = fd.vision.detection.PaddleYOLOv8( + model_file, params_file, config_file, runtime_option=option) elif "ppyolo_r50vd_dcn_1x_coco" in args.model or "ppyolov2_r101vd_dcn_365e_coco" in args.model: model = fd.vision.detection.PPYOLO( model_file, params_file, config_file, runtime_option=option) @@ -284,7 +287,7 @@ def cpu_stat_func(self, q, pid, interval=0.0): model.enable_record_time_of_runtime() im_ori = cv2.imread(args.image) - for i in range(args.iter_num): + for i in tqdm(range(args.iter_num)): im = im_ori start = time.time() result = model.predict(im) diff --git a/docs/cn/faq/rknpu2/rknpu2.md b/docs/cn/faq/rknpu2/rknpu2.md index fcd3e7c35c2..81f35bd431d 100644 --- a/docs/cn/faq/rknpu2/rknpu2.md +++ b/docs/cn/faq/rknpu2/rknpu2.md @@ -13,14 +13,22 @@ ONNX模型不能直接调用RK芯片中的NPU进行运算,需要把ONNX模型 * ARM CPU使用ONNX框架进行测试 * NPU均使用单核进行测试 -| 任务场景 | 模型 | 模型版本(表示已经测试的版本) | ARM CPU/RKNN速度(ms) | -|----------------|------------------------------------------------------------------------------------------|--------------------------|--------------------| -| Detection | [Picodet](../../../../examples/vision/detection/paddledetection/rknpu2/README.md) | Picodet-s | 162/112 | -| Detection | [RKYOLOV5](../../../../examples/vision/detection/rkyolo/README.md) | YOLOV5-S-Relu(int8) | -/57 | -| Detection | [RKYOLOX](../../../../examples/vision/detection/rkyolo/README.md) | - | -/- | -| Detection | [RKYOLOV7](../../../../examples/vision/detection/rkyolo/README.md) | - | -/- | -| Segmentation | [Unet](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md) | Unet-cityscapes | -/- | -| Segmentation | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md) | portrait(int8) | 133/43 | -| Segmentation | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md) | human(int8) | 133/43 | -| Face Detection | [SCRFD](../../../../examples/vision/facedet/scrfd/rknpu2/README.md) | SCRFD-2.5G-kps-640(int8) | 108/42 | -| Classification | [ResNet](../../../../examples/vision/classification/paddleclas/rknpu2/README.md) | ResNet50_vd | -/33 | +| 任务场景 | 模型 | 模型版本(表示已经测试的版本) | ARM CPU/RKNN速度(ms) | +|----------------------|------------------------------------------------------------------------------------------|--------------------------|--------------------| +| Detection | [Picodet](../../../../examples/vision/detection/paddledetection/rknpu2/README.md) | Picodet-s | 162/112 | +| Detection | [RKYOLOV5](../../../../examples/vision/detection/rkyolo/README.md) | YOLOV5-S-Relu(int8) | -/57 | +| Detection | [RKYOLOX](../../../../examples/vision/detection/rkyolo/README.md) | - | -/- | +| Detection | [RKYOLOV7](../../../../examples/vision/detection/rkyolo/README.md) | - | -/- | +| Segmentation | [Unet](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md) | Unet-cityscapes | -/- | +| Segmentation | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md) | portrait(int8) | 133/43 | +| Segmentation | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md) | human(int8) | 133/43 | +| Face Detection | [SCRFD](../../../../examples/vision/facedet/scrfd/rknpu2/README.md) | SCRFD-2.5G-kps-640(int8) | 108/42 | +| Face FaceRecognition | [InsightFace](../../../../examples/vision/faceid/insightface/rknpu2/README_CN.md) | ms1mv3_arcface_r18(int8) | 81/12 | +| Classification | [ResNet](../../../../examples/vision/classification/paddleclas/rknpu2/README.md) | ResNet50_vd | -/33 | + +## 预编译库下载 + +为了方便大家进行开发,这里提供1.0.2版本的FastDeploy给大家使用 + +- [FastDeploy RK356X c++ SDK](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-rk356X-1.0.2.tgz) +- [FastDeploy RK3588 c++ SDK](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-rk3588-1.0.2.tgz) diff --git a/examples/audio/silero-vad/README.md b/examples/audio/silero-vad/README.md new file mode 100644 index 00000000000..8b7e21f4ef3 --- /dev/null +++ b/examples/audio/silero-vad/README.md @@ -0,0 +1,41 @@ +English | [简体中文](README_CN.md) + +# Silero VAD - pre-trained enterprise-grade Voice Activity Detector + +The deployment model comes from [silero-vad](https://github.com/snakers4/silero-vad) + +![](https://user-images.githubusercontent.com/36505480/198026365-8da383e0-5398-4a12-b7f8-22c2c0059512.png) + +## Key Features + +* Stellar accuracy + +Silero VAD has excellent results on speech detection tasks. + +* Fast + +One audio chunk (30+ ms) takes less than 1ms to be processed on a single CPU thread. Using batching or GPU can also improve performance considerably. + +* General + +Silero VAD was trained on huge corpora that include over 100 languages and it performs well on audios from different domains with various background noise and quality levels. + +* Flexible sampling rate + +Silero VAD supports 8000 Hz and 16000 Hz sampling rates. + +## Download Pre-trained ONNX Model + +For developers' testing, model exported by VAD are provided below. Developers can download them directly. + +| 模型 | 大小 | 备注 | +| :----------------------------------------------------------- | :---- | :----------------------------------------------------------- | +| [silero-vad](https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz) | 1.8MB | This model file is sourced from [snakers4/silero-vad](https://github.com/snakers4/silero-vad),MIT License | + +## Detailed Deployment Documents + +- [C++ deployment](cpp) + +## Source + +[https://github.com/snakers4/silero-vad](https://github.com/snakers4/silero-vad) diff --git a/examples/audio/silero-vad/README_CN.md b/examples/audio/silero-vad/README_CN.md new file mode 100644 index 00000000000..81825cf647c --- /dev/null +++ b/examples/audio/silero-vad/README_CN.md @@ -0,0 +1,40 @@ +简体中文 | [English](README.md) + +# Silero VAD 预训练的企业级语音活动检测器 + +该部署模型来自于 [silero-vad](https://github.com/snakers4/silero-vad) + +![](https://user-images.githubusercontent.com/36505480/198026365-8da383e0-5398-4a12-b7f8-22c2c0059512.png) + +## 主要特征 + +* 高准确率 + +Silero VAD在语音检测任务上有着优异的成绩。 + +* 快速推理 + +一个音频块(30+ 毫秒)在单个 CPU 线程上处理时间不到 1毫秒。 + +* 通用性 + +Silero VAD 在包含100多种语言的庞大语料库上进行了训练,它在来自不同领域、具有不同背景噪音和质量水平的音频上表现良好。 + +* 灵活采样率 + +Silero VAD支持 8000 Hz和16000 Hz 采样率。 + +## 下载预训练ONNX模型 + +为了方便开发者的测试,下面提供了 VAD 导出模型,开发者可直接下载使用。 +| 模型 | 大小 | 备注 | +| :----------------------------------------------------------- | :---- | :----------------------------------------------------------- | +| [silero-vad](https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz) | 1.8MB | 此模型文件来源于[snakers4/silero-vad](https://github.com/snakers4/silero-vad),MIT License | + +## 详细部署文档 + +- [C++ 部署](cpp) + +## 模型来源 + +[https://github.com/snakers4/silero-vad](https://github.com/snakers4/silero-vad) diff --git a/examples/audio/silero-vad/cpp/CMakeLists.txt b/examples/audio/silero-vad/cpp/CMakeLists.txt new file mode 100644 index 00000000000..004d1931681 --- /dev/null +++ b/examples/audio/silero-vad/cpp/CMakeLists.txt @@ -0,0 +1,17 @@ +cmake_minimum_required(VERSION 3.23) +project(silero_vad) + +set(CMAKE_CXX_STANDARD 11) + +# 指定下载解压后的fastdeploy库路径 +option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") + +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) + +# 添加FastDeploy依赖头文件 +include_directories(${FASTDEPLOY_INCS}) + +add_executable(infer_onnx_silero_vad ${PROJECT_SOURCE_DIR}/infer_onnx_silero_vad.cc wav.h vad.cc vad.h) + +# 添加FastDeploy库依赖 +target_link_libraries(infer_onnx_silero_vad ${FASTDEPLOY_LIBS}) diff --git a/examples/audio/silero-vad/cpp/README.md b/examples/audio/silero-vad/cpp/README.md new file mode 100644 index 00000000000..f032be86230 --- /dev/null +++ b/examples/audio/silero-vad/cpp/README.md @@ -0,0 +1,121 @@ +English | [简体中文](README_CN.md) + +# Silero VAD Deployment Example + +This directory provides examples that `infer_onnx_silero_vad` fast finishes the deployment of VAD models on CPU/GPU. + +Before deployment, two steps require confirmation. + +- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../docs/en/build_and_install/download_prebuilt_libraries.md). +- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../docs/en/build_and_install/download_prebuilt_libraries.md). + +Taking VAD inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. + +```bash +mkdir build +cd build +# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above +wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz +tar xvf fastdeploy-linux-x64-x.x.x.tgz +cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x +make -j + +# Download the VAD model file and test audio. After decompression, place the model and test audio in the infer_onnx_silero_vad.cc peer directory +wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav + +# inference +./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav +``` + +- The above command works for Linux or MacOS. Refer to: + - [How to use FastDeploy C++ SDK in Windows](../../../../docs/en/faq/use_sdk_on_windows.md) for SDK use-pattern in Windows + +## VAD C++ Interface + +### Vad Class + +```c++ +Vad::Vad(const std::string& model_file, + const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption()) +``` + +**Parameter** + +> * **model_file**(str): Model file path +> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default. (use the default configuration) + +### setAudioCofig function + +**Must be called before the `init` function** + +```c++ +void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms); +``` + +**Parameter** + +> * **sr**(int): sampling rate +> * **frame_ms**(int): The length of each detection frame, and it is used to calculate the detection window size +> * **threshold**(float): Result probability judgment threshold +> * **min_silence_duration_ms**(int): The threshold used to calculate whether it is silence +> * **speech_pad_ms**(int): Used to calculate the end time of the speech + +### init function + +Used to initialize audio-related parameters. + +```c++ +void Vad::init(); +``` + +### loadAudio function + +Load audio. + +```c++ +void Vad::loadAudio(const std::string& wavPath) +``` + +**Parameter** + +> * **wavPath**(str): Audio file path + +### Predict function + +Used to start model reasoning. + +```c++ +bool Vad::Predict(); +``` + +### getResult function + +**Used to obtain reasoning results** + +```c++ +std::vector> Vad::getResult( + float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0, + float mergeThreshold = 0.3); +``` + +**Parameter** + +> * **removeThreshold**(float): Discard result fragment threshold; If some recognition results are too short, they will be discarded according to this threshold +> * **expandHeadThreshold**(float): Offset at the beginning of the segment; The recognized start time may be too close to the voice part, so move forward the start time accordingly +> * **expandTailThreshold**(float): Offset at the end of the segment; The recognized end time may be too close to the voice part, so the end time is moved back accordingly +> * **mergeThreshold**(float): Some result segments are very close and can be combined into one, and the vocal segments can be combined accordingly + +**The output result format is**`std::vector>` + +> Output a list, each element is a speech fragment +> +> Each clip can use 'start' to get the start time and 'end' to get the end time + +### Tips + +1. `The setAudioCofig`function must be called before the `init` function +2. The sampling rate of the input audio file must be consistent with that set in the code + +- [Model Description](../) +- [How to switch the model inference backend engine](../../../../docs/en/faq/how_to_change_backend.md) diff --git a/examples/audio/silero-vad/cpp/README_CN.md b/examples/audio/silero-vad/cpp/README_CN.md new file mode 100644 index 00000000000..c45d9896c38 --- /dev/null +++ b/examples/audio/silero-vad/cpp/README_CN.md @@ -0,0 +1,119 @@ +[English](README.md) | 简体中文 +# Silero VAD 部署示例 + +本目录下提供`infer_onnx_silero_vad`快速完成 Silero VAD 模型在CPU/GPU。 + +在部署前,需确认以下两个步骤 + +- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) +- 2. 根据开发环境,下载预编译部署库和samples代码,参考[FastDeploy预编译库](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) + +以Linux上 VAD 推理为例,在本目录执行如下命令即可完成编译测试。 + +```bash +mkdir build +cd build +# 下载FastDeploy预编译库,用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用 +wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz +tar xvf fastdeploy-linux-x64-x.x.x.tgz +cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x +make -j + +# 下载 VAD 模型文件和测试音频,解压后将模型和测试音频放置在与 infer_onnx_silero_vad.cc 同级目录下 +wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz +wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav + +# 推理 +./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav +``` + +以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考: +- [如何在Windows中使用FastDeploy C++ SDK](../../../../docs/cn/faq/use_sdk_on_windows.md) + +## VAD C++ 接口 +### Vad 类 + +```c++ +Vad::Vad(const std::string& model_file, + const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption()) +``` + +**参数** + +> * **model_file**(str): 模型文件路径 +> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 + +### setAudioCofig 函数 + +**必须在`init`函数前调用** + +```c++ +void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms); +``` + +**参数** + +> * **sr**(int): 采样率 +> * **frame_ms**(int): 每次检测帧长,用于计算检测窗口大小 +> * **threshold**(float): 结果概率判断阈值 +> * **min_silence_duration_ms**(int): 用于计算判断是否是 silence 的阈值 +> * **speech_pad_ms**(int): 用于计算 speach 结束时刻 + +### init 函数 + +用于初始化音频相关参数 + +```c++ +void Vad::init(); +``` + +### loadAudio 函数 + +加载音频 + +```c++ +void Vad::loadAudio(const std::string& wavPath) +``` + +**参数** + +> * **wavPath**(str): 音频文件路径 + +### Predict 函数 + +用于开始模型推理 + +```c++ +bool Vad::Predict(); +``` + +### getResult 函数 + +**用于获取推理结果** + +```c++ +std::vector> Vad::getResult( + float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0, + float mergeThreshold = 0.3); +``` + +**参数** + +> * **removeThreshold**(float): 丢弃结果片段阈值;部分识别结果太短则根据此阈值丢弃 +> * **expandHeadThreshold**(float): 结果片段开始时刻偏移;识别到的开始时刻可能过于贴近发声部分,因此据此前移开始时刻 +> * **expandTailThreshold**(float): 结果片段结束时刻偏移;识别到的结束时刻可能过于贴近发声部分,因此据此后移结束时刻 +> * **mergeThreshold**(float): 有的结果片段十分靠近,可以合并成一个,据此合并发声片段 + +**输出结果格式为**`std::vector>` + +> 输出一个列表,每个元素是一个讲话片段 +> +> 每个片段可以用 'start' 获取到开始时刻,用 'end' 获取到结束时刻 + +### 提示 + +1. `setAudioCofig`函数必须在`init`函数前调用 +2. 输入的音频文件的采样率必须与代码中设置的保持一致 + +- [模型介绍](../) +- [如何切换模型推理后端引擎](../../../../docs/cn/faq/how_to_change_backend.md) diff --git a/examples/audio/silero-vad/cpp/infer_onnx_silero_vad.cc b/examples/audio/silero-vad/cpp/infer_onnx_silero_vad.cc new file mode 100644 index 00000000000..a23898550c3 --- /dev/null +++ b/examples/audio/silero-vad/cpp/infer_onnx_silero_vad.cc @@ -0,0 +1,29 @@ +#include + +#include "vad.h" + +int main(int argc, char* argv[]) { + if (argc < 3) { + std::cout << "Usage: infer_onnx_silero_vad path/to/model path/to/audio " + "run_option, " + "e.g ./infer_onnx_silero_vad silero_vad.onnx sample.wav" + << std::endl; + return -1; + } + + std::string model_file = argv[1]; + std::string audio_file = argv[2]; + + Vad vad(model_file); + // custom config, but must be set before init + // vad.setAudioCofig(16000, 64, 0.5f, 0, 0); + vad.init(); + vad.loadAudio(audio_file); + vad.Predict(); + std::vector> result = vad.getResult(); + for (auto& res : result) { + std::cout << "speak start: " << res["start"] << " s, end: " << res["end"] + << " s" << std::endl; + } + return 0; +} diff --git a/examples/audio/silero-vad/cpp/vad.cc b/examples/audio/silero-vad/cpp/vad.cc new file mode 100644 index 00000000000..5b451605905 --- /dev/null +++ b/examples/audio/silero-vad/cpp/vad.cc @@ -0,0 +1,258 @@ +// Copyright (c) 2023 Chen Qianhe Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "vad.h" + +int Vad::getSampleRate() const { return sample_rate_; } + +int Vad::getFrameMs() const { return frame_ms_; } + +float Vad::getThreshold() const { return threshold_; } + +int Vad::getMinSilenceDurationMs() const { return min_silence_duration_ms_; } + +int Vad::getSpeechPadMs() const { return speech_pad_ms_; } + +const wav::WavReader &Vad::getWavReader() const { return wavReader_; } + +const std::vector &Vad::getData() const { return data_; } + +const std::vector &Vad::getInputWav() const { return inputWav_; } + +int64_t Vad::getWindowSizeSamples() const { return window_size_samples_; } + +int Vad::getSrPerMs() const { return sr_per_ms_; } + +int Vad::getMinSilenceSamples() const { return min_silence_samples_; } + +int Vad::getSpeechPadSamples() const { return speech_pad_samples_; } + +std::string Vad::ModelName() const { return "VAD"; } + +void Vad::loadAudio(const std::string &wavPath) { + wavReader_ = wav::WavReader(wavPath); + data_.reserve(wavReader_.num_samples()); + inputWav_.reserve(wavReader_.num_samples()); + + for (int i = 0; i < wavReader_.num_samples(); i++) { + data_[i] = static_cast(*(wavReader_.data() + i)); + } + + for (int i = 0; i < wavReader_.num_samples(); i++) { + inputWav_[i] = static_cast(data_[i]) / 32768; + } +} + +bool Vad::Initialize() { + // initAudioConfig + sr_per_ms_ = sample_rate_ / 1000; + min_silence_samples_ = sr_per_ms_ * min_silence_duration_ms_; + speech_pad_samples_ = sr_per_ms_ * speech_pad_ms_; + window_size_samples_ = frame_ms_ * sr_per_ms_; + + // initInputConfig + input_.resize(window_size_samples_); + input_node_dims_.emplace_back(1); + input_node_dims_.emplace_back(window_size_samples_); + + _h.resize(size_hc_); + _c.resize(size_hc_); + sr_.resize(1); + sr_[0] = sample_rate_; + + // InitRuntime + if (!InitRuntime()) { + fastdeploy::FDERROR << "Failed to initialize fastdeploy backend." + << std::endl; + return false; + } + return true; +} + +void Vad::setAudioCofig(int sr, int frame_ms, float threshold, + int min_silence_duration_ms, int speech_pad_ms) { + if (initialized) { + fastdeploy::FDERROR << "setAudioCofig must be called before init" + << std::endl; + throw std::runtime_error("setAudioCofig must be called before init"); + } + sample_rate_ = sr; + Vad::frame_ms_ = frame_ms; + Vad::threshold_ = threshold; + Vad::min_silence_duration_ms_ = min_silence_duration_ms; + Vad::speech_pad_ms_ = speech_pad_ms; +} + +bool Vad::Preprocess(std::vector audioWindowData) { + fastdeploy::FDTensor inputTensor, srTensor, hTensor, cTensor; + inputTensor.SetExternalData(input_node_dims_, fastdeploy::FDDataType::FP32, + audioWindowData.data()); + inputTensor.name = "input"; + srTensor.SetExternalData(sr_node_dims_, fastdeploy::FDDataType::INT64, + sr_.data()); + srTensor.name = "sr"; + hTensor.SetExternalData(hc_node_dims_, fastdeploy::FDDataType::FP32, + _h.data()); + hTensor.name = "h"; + cTensor.SetExternalData(hc_node_dims_, fastdeploy::FDDataType::FP32, + _c.data()); + cTensor.name = "c"; + + inputTensors_.clear(); + inputTensors_.emplace_back(inputTensor); + inputTensors_.emplace_back(srTensor); + inputTensors_.emplace_back(hTensor); + inputTensors_.emplace_back(cTensor); + return true; +} + +bool Vad::Predict() { + if (wavReader_.sample_rate() != sample_rate_) { + fastdeploy::FDINFO << "The sampling rate of the audio file is " << wavReader_.sample_rate() << std::endl; + fastdeploy::FDINFO << "The set sample rate is " << sample_rate_ << std::endl; + fastdeploy::FDERROR << "The sampling rate of the audio file is not equal " + "to the sampling rate set by the program. " + << "Please make it equal. " + << "You can modify the audio file sampling rate, " + << "or use setAudioCofig to modify the program's " + "sampling rate and other configurations." + << std::endl; + throw std::runtime_error( + "The sampling rate of the audio file is not equal to the sampling rate " + "set by the program."); + } + for (int64_t j = 0; j < wavReader_.num_samples(); j += window_size_samples_) { + std::vector r{&inputWav_[0] + j, + &inputWav_[0] + j + window_size_samples_}; + Preprocess(r); + if (!Infer(inputTensors_, &outputTensors_)) { + fastdeploy::FDERROR << "Failed to inference while using model:" + << ModelName() << "." << std::endl; + return false; + } + Postprocess(); + } + return true; +} + +bool Vad::Postprocess() { + // update prob, h, c + outputProb_ = *(float *)outputTensors_[0].Data(); + auto *hn = static_cast(outputTensors_[1].MutableData()); + std::memcpy(_h.data(), hn, size_hc_ * sizeof(float)); + auto *cn = static_cast(outputTensors_[2].MutableData()); + std::memcpy(_c.data(), cn, size_hc_ * sizeof(float)); + + // Push forward sample index + current_sample_ += window_size_samples_; + + if (outputProb_ >= threshold_ && temp_end_) { + // Reset temp_end_ when > threshold_ + temp_end_ = 0; + } + if (outputProb_ < threshold_ && !triggerd_) { + // 1) Silence + // printf("{ silence: %.3f s }\n", 1.0 * current_sample_ / sample_rate_); + } + if (outputProb_ >= threshold_ - 0.15 && triggerd_) { + // 2) Speaking + // printf("{ speaking_2: %.3f s }\n", 1.0 * current_sample_ / sample_rate_); + } + if (outputProb_ >= threshold_ && !triggerd_) { + // 3) Start + triggerd_ = true; + speech_start_ = current_sample_ - window_size_samples_ - + speech_pad_samples_; // minus window_size_samples_ to get + // precise start time point. + // printf("{ start: %.5f s }\n", 1.0 * speech_start_ / sample_rate_); + speakStart_.emplace_back(1.0 * speech_start_ / sample_rate_); + } + if (outputProb_ < threshold_ - 0.15 && triggerd_) { + // 4) End + if (temp_end_ != 0) { + temp_end_ = current_sample_; + } + if (current_sample_ - temp_end_ < min_silence_samples_) { + // a. silence < min_slience_samples, continue speaking + // printf("{ speaking_4: %.3f s }\n", 1.0 * current_sample_ / sample_rate_); + // printf(""); + } else { + // b. silence >= min_slience_samples, end speaking + speech_end_ = current_sample_ + speech_pad_samples_; + temp_end_ = 0; + triggerd_ = false; + // printf("{ end: %.5f s }\n", 1.0 * speech_end_ / sample_rate_); + speakEnd_.emplace_back(1.0 * speech_end_ / sample_rate_); + } + } + + return true; +} + +std::vector> Vad::getResult( + float removeThreshold, float expandHeadThreshold, float expandTailThreshold, + float mergeThreshold) { + float audioLength = 1.0 * wavReader_.num_samples() / sample_rate_; + if (speakStart_.empty() && speakEnd_.empty()) { + return {}; + } + if (speakEnd_.size() != speakStart_.size()) { + // set the audio length as the last end + speakEnd_.emplace_back(audioLength); + } + // Remove too short segments + auto startIter = speakStart_.begin(); + auto endIter = speakEnd_.begin(); + while (startIter != speakStart_.end()) { + if (removeThreshold < audioLength && + *endIter - *startIter < removeThreshold) { + startIter = speakStart_.erase(startIter); + endIter = speakEnd_.erase(endIter); + } else { + startIter++; + endIter++; + } + } + // Expand to avoid to tight cut. + startIter = speakStart_.begin(); + endIter = speakEnd_.begin(); + *startIter = std::fmax(0.f, *startIter - expandHeadThreshold); + *endIter = std::fmin(*endIter + expandTailThreshold, *(startIter + 1)); + endIter = speakEnd_.end() - 1; + startIter = speakStart_.end() - 1; + *startIter = fmax(*startIter - expandHeadThreshold, *(endIter - 1)); + *endIter = std::fmin(*endIter + expandTailThreshold, audioLength); + for (int i = 1; i < speakStart_.size() - 1; ++i) { + speakStart_[i] = std::fmax(speakStart_[i] - expandHeadThreshold, speakEnd_[i - 1]); + speakEnd_[i] = std::fmin(speakEnd_[i] + expandTailThreshold, speakStart_[i + 1]); + } + // Merge very closed segments + startIter = speakStart_.begin() + 1; + endIter = speakEnd_.begin(); + while (startIter != speakStart_.end()) { + if (*startIter - *endIter < mergeThreshold) { + startIter = speakStart_.erase(startIter); + endIter = speakEnd_.erase(endIter); + } else { + startIter++; + endIter++; + } + } + + std::vector> result; + for (int i = 0; i < speakStart_.size(); ++i) { + result.emplace_back(std::map( + {{"start", speakStart_[i]}, {"end", speakEnd_[i]}})); + } + return result; +} diff --git a/examples/audio/silero-vad/cpp/vad.h b/examples/audio/silero-vad/cpp/vad.h new file mode 100644 index 00000000000..322e98a4812 --- /dev/null +++ b/examples/audio/silero-vad/cpp/vad.h @@ -0,0 +1,141 @@ +// Copyright (c) 2023 Chen Qianhe Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include + +#include "fastdeploy/runtime.h" +#include "fastdeploy/fastdeploy_model.h" +#include "fastdeploy/vision/utils/utils.h" +#include "wav.h" + +class Vad:public fastdeploy::FastDeployModel{ + public: + std::string ModelName() const override; + + Vad(const std::string& model_file, + const fastdeploy::RuntimeOption& custom_option = + fastdeploy::RuntimeOption()) { + valid_cpu_backends = {fastdeploy::Backend::ORT, + fastdeploy::Backend::OPENVINO}; + valid_gpu_backends = {fastdeploy::Backend::ORT, + fastdeploy::Backend::TRT}; + + runtime_option = custom_option; + runtime_option.model_format = fastdeploy::ModelFormat::ONNX; + runtime_option.model_file = model_file; + runtime_option.params_file = ""; + } + + void init() { + initialized = Initialize(); + } + + void setAudioCofig( + int sr, int frame_ms, float threshold, + int min_silence_duration_ms, int speech_pad_ms); + + void loadAudio(const std::string& wavPath); + + bool Predict(); + + std::vector> getResult( + float removeThreshold = 1.6, + float expandHeadThreshold = 0.32, float expandTailThreshold = 0, + float mergeThreshold = 0.3); + + private: + bool Initialize(); + + bool Preprocess(std::vector audioWindowData); + + bool Postprocess(); + + private: + // model + std::vector inputTensors_; + std::vector outputTensors_; + // model states + bool triggerd_ = false; + unsigned int speech_start_ = 0; + unsigned int speech_end_ = 0; + unsigned int temp_end_ = 0; + unsigned int current_sample_ = 0; + // MAX 4294967295 samples / 8sample per ms / 1000 / 60 = 8947 minutes + float outputProb_; + + /* ======================================================================== */ + + // input wav data + wav::WavReader wavReader_; + std::vector data_; + std::vector inputWav_; + + /* ======================================================================== */ + + // audio config + int sample_rate_ = 16000; + int frame_ms_ = 64; + float threshold_ = 0.5f; + int min_silence_duration_ms_ = 0; + int speech_pad_ms_ = 0; + + int64_t window_size_samples_; + // Assign when init, support 256 512 768 for 8k; 512 1024 1536 for 16k. + int sr_per_ms_; // Assign when init, support 8 or 16 + int min_silence_samples_; // sr_per_ms_ * #ms + int speech_pad_samples_; // usually a + + /* ======================================================================== */ + + std::vector input_; + std::vector sr_; + unsigned int size_hc_ = 2 * 1 * 64; // It's FIXED. + std::vector _h; + std::vector _c; + + std::vector input_node_dims_; + const std::vector sr_node_dims_ = {1}; + const std::vector hc_node_dims_ = {2, 1, 64}; + + /* ======================================================================== */ + + std::vector speakStart_; + std::vector speakEnd_; + + public: + int getSampleRate() const; + + int getFrameMs() const; + + float getThreshold() const; + + int getMinSilenceDurationMs() const; + + int getSpeechPadMs() const; + + const wav::WavReader &getWavReader() const; + + const std::vector &getData() const; + + const std::vector &getInputWav() const; + + int64_t getWindowSizeSamples() const; + + int getSrPerMs() const; + + int getMinSilenceSamples() const; + + int getSpeechPadSamples() const; +}; diff --git a/examples/audio/silero-vad/cpp/wav.h b/examples/audio/silero-vad/cpp/wav.h new file mode 100644 index 00000000000..adb81761c94 --- /dev/null +++ b/examples/audio/silero-vad/cpp/wav.h @@ -0,0 +1,193 @@ +// Copyright (c) 2016 Personal (Binbin Zhang) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace wav { + +struct WavHeader { + char riff[4]; // "riff" + unsigned int size; + char wav[4]; // "WAVE" + char fmt[4]; // "fmt " + unsigned int fmt_size; + uint16_t format; + uint16_t channels; + unsigned int sample_rate; + unsigned int bytes_per_second; + uint16_t block_size; + uint16_t bit; + char data[4]; // "data" + unsigned int data_size; +}; + +class WavReader { + public: + WavReader() : data_(nullptr) {} + explicit WavReader(const std::string& filename) { Open(filename); } + + bool Open(const std::string& filename) { + FILE* fp = fopen(filename.c_str(), "rb"); + if (NULL == fp) { + std::cout << "Error in read " << filename; + return false; + } + + WavHeader header; + fread(&header, 1, sizeof(header), fp); + if (header.fmt_size < 16) { + fprintf(stderr, + "WaveData: expect PCM format data " + "to have fmt chunk of at least size 16.\n"); + return false; + } else if (header.fmt_size > 16) { + int offset = 44 - 8 + header.fmt_size - 16; + fseek(fp, offset, SEEK_SET); + fread(header.data, 8, sizeof(char), fp); + } + // check "riff" "WAVE" "fmt " "data" + + // Skip any sub-chunks between "fmt" and "data". Usually there will + // be a single "fact" sub chunk, but on Windows there can also be a + // "list" sub chunk. + while (0 != strncmp(header.data, "data", 4)) { + // We will just ignore the data in these chunks. + fseek(fp, header.data_size, SEEK_CUR); + // read next sub chunk + fread(header.data, 8, sizeof(char), fp); + } + + num_channel_ = header.channels; + sample_rate_ = header.sample_rate; + bits_per_sample_ = header.bit; + int num_data = header.data_size / (bits_per_sample_ / 8); + data_ = new float[num_data]; // Create 1-dim array + num_samples_ = num_data / num_channel_; + + for (int i = 0; i < num_data; ++i) { + switch (bits_per_sample_) { + case 8: { + char sample; + fread(&sample, 1, sizeof(char), fp); + data_[i] = static_cast(sample); + break; + } + case 16: { + int16_t sample; + fread(&sample, 1, sizeof(int16_t), fp); + // std::cout << sample; + data_[i] = static_cast(sample); + // std::cout << data_[i]; + break; + } + case 32: { + int sample; + fread(&sample, 1, sizeof(int), fp); + data_[i] = static_cast(sample); + break; + } + default: + fprintf(stderr, "unsupported quantization bits"); + exit(1); + } + } + fclose(fp); + return true; + } + + int num_channel() const { return num_channel_; } + int sample_rate() const { return sample_rate_; } + int bits_per_sample() const { return bits_per_sample_; } + int num_samples() const { return num_samples_; } + const float* data() const { return data_; } + + private: + int num_channel_; + int sample_rate_; + int bits_per_sample_; + int num_samples_; // sample points per channel + float* data_; +}; + +class WavWriter { + public: + WavWriter(const float* data, int num_samples, int num_channel, + int sample_rate, int bits_per_sample) + : data_(data), + num_samples_(num_samples), + num_channel_(num_channel), + sample_rate_(sample_rate), + bits_per_sample_(bits_per_sample) {} + + void Write(const std::string& filename) { + FILE* fp = fopen(filename.c_str(), "w"); + // init char 'riff' 'WAVE' 'fmt ' 'data' + WavHeader header; + char wav_header[44] = {0x52, 0x49, 0x46, 0x46, 0x00, 0x00, 0x00, 0x00, 0x57, + 0x41, 0x56, 0x45, 0x66, 0x6d, 0x74, 0x20, 0x10, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x64, 0x61, 0x74, 0x61, 0x00, 0x00, 0x00, 0x00}; + memcpy(&header, wav_header, sizeof(header)); + header.channels = num_channel_; + header.bit = bits_per_sample_; + header.sample_rate = sample_rate_; + header.data_size = num_samples_ * num_channel_ * (bits_per_sample_ / 8); + header.size = sizeof(header) - 8 + header.data_size; + header.bytes_per_second = + sample_rate_ * num_channel_ * (bits_per_sample_ / 8); + header.block_size = num_channel_ * (bits_per_sample_ / 8); + + fwrite(&header, 1, sizeof(header), fp); + + for (int i = 0; i < num_samples_; ++i) { + for (int j = 0; j < num_channel_; ++j) { + switch (bits_per_sample_) { + case 8: { + char sample = static_cast(data_[i * num_channel_ + j]); + fwrite(&sample, 1, sizeof(sample), fp); + break; + } + case 16: { + int16_t sample = static_cast(data_[i * num_channel_ + j]); + fwrite(&sample, 1, sizeof(sample), fp); + break; + } + case 32: { + int sample = static_cast(data_[i * num_channel_ + j]); + fwrite(&sample, 1, sizeof(sample), fp); + break; + } + } + } + } + fclose(fp); + } + + private: + const float* data_; + int num_samples_; // total float points in data_ + int num_channel_; + int sample_rate_; + int bits_per_sample_; +}; + +} // namespace wav diff --git a/examples/vision/facedet/scrfd/cpp/CMakeLists.txt b/examples/vision/facedet/scrfd/cpp/CMakeLists.txt index 93540a7e83e..41516387891 100644 --- a/examples/vision/facedet/scrfd/cpp/CMakeLists.txt +++ b/examples/vision/facedet/scrfd/cpp/CMakeLists.txt @@ -1,14 +1,15 @@ PROJECT(infer_demo C CXX) CMAKE_MINIMUM_REQUIRED (VERSION 3.10) -# 指定下载解压后的fastdeploy库路径 option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) -# 添加FastDeploy依赖头文件 + include_directories(${FASTDEPLOY_INCS}) -add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc) -# 添加FastDeploy库依赖 -target_link_libraries(infer_demo ${FASTDEPLOY_LIBS}) +add_executable(infer_with_face_align_demo ${PROJECT_SOURCE_DIR}/infer_with_face_align.cc) +target_link_libraries(infer_with_face_align_demo ${FASTDEPLOY_LIBS}) + +add_executable(infer_without_face_align_demo ${PROJECT_SOURCE_DIR}/infer_without_face_align.cc) +target_link_libraries(infer_without_face_align_demo ${FASTDEPLOY_LIBS}) diff --git a/examples/vision/facedet/scrfd/cpp/README_CN.md b/examples/vision/facedet/scrfd/cpp/README_CN.md index 1c01173b247..b4e0257630b 100644 --- a/examples/vision/facedet/scrfd/cpp/README_CN.md +++ b/examples/vision/facedet/scrfd/cpp/README_CN.md @@ -23,13 +23,21 @@ make -j wget https://bj.bcebos.com/paddlehub/fastdeploy/scrfd_500m_bnkps_shape640x640.onnx wget https://raw.githubusercontent.com/DefTruth/lite.ai.toolkit/main/examples/lite/resources/test_lite_face_detector_3.jpg +# SCRFD +# CPU推理 +./infer_without_face_align_demo scrfd_500m_bnkps_shape640x640.onnx test_lite_face_detector_3.jpg 0 +# GPU推理 +./infer_without_face_align_demo scrfd_500m_bnkps_shape640x640.onnx test_lite_face_detector_3.jpg 1 +# GPU上TensorRT推理 +./infer_without_face_align_demo scrfd_500m_bnkps_shape640x640.onnx test_lite_face_detector_3.jpg 2 +# SCRFD + FaceAlign # CPU推理 -./infer_demo scrfd_500m_bnkps_shape640x640.onnx test_lite_face_detector_3.jpg 0 +./infer_with_face_align_demo scrfd_500m_bnkps_shape640x640.onnx test_lite_face_detector_3.jpg 0 # GPU推理 -./infer_demo scrfd_500m_bnkps_shape640x640.onnx test_lite_face_detector_3.jpg 1 +./infer_with_face_align_demo scrfd_500m_bnkps_shape640x640.onnx test_lite_face_detector_3.jpg 1 # GPU上TensorRT推理 -./infer_demo scrfd_500m_bnkps_shape640x640.onnx test_lite_face_detector_3.jpg 2 +./infer_with_face_align_demo scrfd_500m_bnkps_shape640x640.onnx test_lite_face_detector_3.jpg 2 ``` 运行完成可视化结果如下图所示 diff --git a/examples/vision/facedet/scrfd/cpp/infer_with_face_align.cc b/examples/vision/facedet/scrfd/cpp/infer_with_face_align.cc new file mode 100644 index 00000000000..1a32ac67c8b --- /dev/null +++ b/examples/vision/facedet/scrfd/cpp/infer_with_face_align.cc @@ -0,0 +1,115 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision.h" + +void CpuInfer(const std::string& model_file, const std::string& image_file) { + auto model = fastdeploy::vision::facedet::SCRFD(model_file); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::FaceDetectionResult res; + if (!model.Predict(&im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + std::cout << res.Str() << std::endl; + + auto vis_im_list = + fastdeploy::vision::utils::AlignFaceWithFivePoints(im, res); + if (!vis_im_list.empty()) { + cv::imwrite("vis_result.jpg", vis_im_list[0]); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; + } +} + +void GpuInfer(const std::string& model_file, const std::string& image_file) { + auto option = fastdeploy::RuntimeOption(); + option.UseGpu(); + auto model = fastdeploy::vision::facedet::SCRFD(model_file, "", option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::FaceDetectionResult res; + if (!model.Predict(&im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + std::cout << res.Str() << std::endl; + + auto vis_im_list = + fastdeploy::vision::utils::AlignFaceWithFivePoints(im, res); + if (!vis_im_list.empty()) { + cv::imwrite("vis_result.jpg", vis_im_list[0]); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; + } +} + +void TrtInfer(const std::string& model_file, const std::string& image_file) { + auto option = fastdeploy::RuntimeOption(); + option.UseGpu(); + option.UseTrtBackend(); + option.SetTrtInputShape("images", {1, 3, 640, 640}); + auto model = fastdeploy::vision::facedet::SCRFD(model_file, "", option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::FaceDetectionResult res; + if (!model.Predict(&im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + std::cout << res.Str() << std::endl; + + auto vis_im_list = + fastdeploy::vision::utils::AlignFaceWithFivePoints(im, res); + if (!vis_im_list.empty()) { + cv::imwrite("vis_result.jpg", vis_im_list[0]); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; + } +} + +int main(int argc, char* argv[]) { + if (argc < 4) { + std::cout + << "Usage: infer_demo path/to/model path/to/image run_option, " + "e.g ./infer_model scrfd_500m_bnkps_shape640x640.onnx ./test.jpeg 0" + << std::endl; + std::cout << "The data type of run_option is int, 0: run with cpu; 1: run " + "with gpu; 2: run with gpu and use tensorrt backend." + << std::endl; + return -1; + } + + if (std::atoi(argv[3]) == 0) { + CpuInfer(argv[1], argv[2]); + } else if (std::atoi(argv[3]) == 1) { + GpuInfer(argv[1], argv[2]); + } else if (std::atoi(argv[3]) == 2) { + TrtInfer(argv[1], argv[2]); + } + return 0; +} diff --git a/examples/vision/facedet/scrfd/cpp/infer.cc b/examples/vision/facedet/scrfd/cpp/infer_without_face_align.cc similarity index 100% rename from examples/vision/facedet/scrfd/cpp/infer.cc rename to examples/vision/facedet/scrfd/cpp/infer_without_face_align.cc diff --git a/examples/vision/faceid/insightface/cpp/README_CN.md b/examples/vision/faceid/insightface/cpp/README_CN.md index ff03789ca2c..7f0dc442c5c 100644 --- a/examples/vision/faceid/insightface/cpp/README_CN.md +++ b/examples/vision/faceid/insightface/cpp/README_CN.md @@ -101,7 +101,7 @@ VPL模型加载和初始化,其中model_file为导出的ONNX模型格式。 #### Predict函数 > ```c++ -> ArcFace::Predict(cv::Mat* im, FaceRecognitionResult* result) +> ArcFace::Predict(const cv::Mat& im, FaceRecognitionResult* result) > ``` > > 模型预测接口,输入图像直接输出检测结果。 @@ -121,8 +121,6 @@ VPL模型加载和初始化,其中model_file为导出的ONNX模型格式。 通过InsightFaceRecognitionPreprocessor::SetAlpha(std::vector& alpha)来进行修改 > > * **beta**(vector<float>): 预处理归一化的beta值,计算公式为`x'=x*alpha+beta`,beta默认为[-1.f, -1.f, -1.f], 通过InsightFaceRecognitionPreprocessor::SetBeta(std::vector& beta)来进行修改 -> > * **permute**(bool): 预处理是否将BGR转换成RGB,默认true, - 通过InsightFaceRecognitionPreprocessor::SetPermute(bool permute)来进行修改 #### InsightFaceRecognitionPostprocessor成员变量(后处理参数) > > * **l2_normalize**(bool): 输出人脸向量之前是否执行l2归一化,默认false, diff --git a/examples/vision/faceid/insightface/python/README_CN.md b/examples/vision/faceid/insightface/python/README_CN.md index b30921b7e7f..b5d444c5f9b 100644 --- a/examples/vision/faceid/insightface/python/README_CN.md +++ b/examples/vision/faceid/insightface/python/README_CN.md @@ -100,7 +100,6 @@ ArcFace模型加载和初始化,其中model_file为导出的ONNX模型格式 > > * **size**(list[int]): 通过此参数修改预处理过程中resize的大小,包含两个整型元素,表示[width, height], 默认值为[112, 112] > > * **alpha**(list[float]): 预处理归一化的alpha值,计算公式为`x'=x*alpha+beta`,alpha默认为[1. / 127.5, 1.f / 127.5, 1. / 127.5] > > * **beta**(list[float]): 预处理归一化的beta值,计算公式为`x'=x*alpha+beta`,beta默认为[-1.f, -1.f, -1.f] -> > * **swap_rb**(bool): 预处理是否将BGR转换成RGB,默认True #### AdaFacePostprocessor的成员变量 以下变量为AdaFacePostprocessor的成员变量 diff --git a/examples/vision/faceid/insightface/python/infer_arcface.py b/examples/vision/faceid/insightface/python/infer_arcface.py index 06e8ef00186..c426a35ca3e 100644 --- a/examples/vision/faceid/insightface/python/infer_arcface.py +++ b/examples/vision/faceid/insightface/python/infer_arcface.py @@ -3,7 +3,6 @@ import numpy as np -# 余弦相似度 def cosine_similarity(a, b): a = np.array(a) b = np.array(b) @@ -56,24 +55,17 @@ def build_option(args): args = parse_arguments() -# 配置runtime,加载模型 runtime_option = build_option(args) model = fd.vision.faceid.ArcFace(args.model, runtime_option=runtime_option) -# 加载图片 face0 = cv2.imread(args.face) # 0,1 同一个人 face1 = cv2.imread(args.face_positive) face2 = cv2.imread(args.face_negative) # 0,2 不同的人 -# 设置 l2 normalize -model.postprocessor.l2_normalize = True - -# 预测图片检测结果 result0 = model.predict(face0) result1 = model.predict(face1) result2 = model.predict(face2) -# 计算余弦相似度 embedding0 = result0.embedding embedding1 = result1.embedding embedding2 = result2.embedding @@ -81,7 +73,6 @@ def build_option(args): cosine01 = cosine_similarity(embedding0, embedding1) cosine02 = cosine_similarity(embedding0, embedding2) -# 打印结果 print(result0, end="") print(result1, end="") print(result2, end="") diff --git a/examples/vision/faceid/insightface/python/infer_cosface.py b/examples/vision/faceid/insightface/python/infer_cosface.py index 2bb1292f492..b316057e0a0 100644 --- a/examples/vision/faceid/insightface/python/infer_cosface.py +++ b/examples/vision/faceid/insightface/python/infer_cosface.py @@ -3,7 +3,6 @@ import numpy as np -# 余弦相似度 def cosine_similarity(a, b): a = np.array(a) b = np.array(b) @@ -56,24 +55,17 @@ def build_option(args): args = parse_arguments() -# 配置runtime,加载模型 runtime_option = build_option(args) model = fd.vision.faceid.CosFace(args.model, runtime_option=runtime_option) -# 加载图片 -face0 = cv2.imread(args.face) # 0,1 同一个人 +face0 = cv2.imread(args.face) face1 = cv2.imread(args.face_positive) -face2 = cv2.imread(args.face_negative) # 0,2 不同的人 +face2 = cv2.imread(args.face_negative) -# 设置 l2 normalize -model.postprocessor.l2_normalize = True - -# 预测图片检测结果 result0 = model.predict(face0) result1 = model.predict(face1) result2 = model.predict(face2) -# 计算余弦相似度 embedding0 = result0.embedding embedding1 = result1.embedding embedding2 = result2.embedding @@ -81,7 +73,6 @@ def build_option(args): cosine01 = cosine_similarity(embedding0, embedding1) cosine02 = cosine_similarity(embedding0, embedding2) -# 打印结果 print(result0, end="") print(result1, end="") print(result2, end="") diff --git a/examples/vision/faceid/insightface/python/infer_partial_fc.py b/examples/vision/faceid/insightface/python/infer_partial_fc.py index e81531e6eec..e979e774cdd 100644 --- a/examples/vision/faceid/insightface/python/infer_partial_fc.py +++ b/examples/vision/faceid/insightface/python/infer_partial_fc.py @@ -3,7 +3,6 @@ import numpy as np -# 余弦相似度 def cosine_similarity(a, b): a = np.array(a) b = np.array(b) @@ -56,24 +55,18 @@ def build_option(args): args = parse_arguments() -# 配置runtime,加载模型 runtime_option = build_option(args) model = fd.vision.faceid.PartialFC(args.model, runtime_option=runtime_option) # 加载图片 -face0 = cv2.imread(args.face) # 0,1 同一个人 +face0 = cv2.imread(args.face) face1 = cv2.imread(args.face_positive) -face2 = cv2.imread(args.face_negative) # 0,2 不同的人 +face2 = cv2.imread(args.face_negative) -# 设置 l2 normalize -model.postprocessor.l2_normalize = True - -# 预测图片检测结果 result0 = model.predict(face0) result1 = model.predict(face1) result2 = model.predict(face2) -# 计算余弦相似度 embedding0 = result0.embedding embedding1 = result1.embedding embedding2 = result2.embedding @@ -81,7 +74,6 @@ def build_option(args): cosine01 = cosine_similarity(embedding0, embedding1) cosine02 = cosine_similarity(embedding0, embedding2) -# 打印结果 print(result0, end="") print(result1, end="") print(result2, end="") diff --git a/examples/vision/faceid/insightface/python/infer_vpl.py b/examples/vision/faceid/insightface/python/infer_vpl.py index 6113ad3df72..8c6f711f3d4 100644 --- a/examples/vision/faceid/insightface/python/infer_vpl.py +++ b/examples/vision/faceid/insightface/python/infer_vpl.py @@ -3,7 +3,6 @@ import numpy as np -# 余弦相似度 def cosine_similarity(a, b): a = np.array(a) b = np.array(b) @@ -56,24 +55,17 @@ def build_option(args): args = parse_arguments() -# 配置runtime,加载模型 runtime_option = build_option(args) model = fd.vision.faceid.VPL(args.model, runtime_option=runtime_option) -# 加载图片 face0 = cv2.imread(args.face) # 0,1 同一个人 face1 = cv2.imread(args.face_positive) face2 = cv2.imread(args.face_negative) # 0,2 不同的人 -# 设置 l2 normalize -model.postprocessor.l2_normalize = True - -# 预测图片检测结果 result0 = model.predict(face0) result1 = model.predict(face1) result2 = model.predict(face2) -# 计算余弦相似度 embedding0 = result0.embedding embedding1 = result1.embedding embedding2 = result2.embedding @@ -81,7 +73,6 @@ def build_option(args): cosine01 = cosine_similarity(embedding0, embedding1) cosine02 = cosine_similarity(embedding0, embedding2) -# 打印结果 print(result0, end="") print(result1, end="") print(result2, end="") diff --git a/examples/vision/faceid/insightface/rknpu2/README.md b/examples/vision/faceid/insightface/rknpu2/README.md new file mode 100644 index 00000000000..01bee2e287f --- /dev/null +++ b/examples/vision/faceid/insightface/rknpu2/README.md @@ -0,0 +1,54 @@ +[English](README.md) | 简体中文 +# InsightFace RKNPU准备部署模型 + +本教程提供InsightFace模型在RKNPU2环境下的部署,模型的详细介绍已经ONNX模型的下载请查看[模型介绍文档](../README.md)。 + +## 支持模型列表 +目前FastDeploy支持如下模型的部署 +- ArcFace +- CosFace +- PartialFC +- VPL + +## 下载预训练ONNX模型 + +为了方便开发者的测试,下面提供了InsightFace导出的各系列模型,开发者可直接下载使用。(下表中模型的精度来源于源官方库)其中精度指标来源于InsightFace中对各模型的介绍,详情各参考InsightFace中的说明 + +| 模型 | 大小 | 精度 (AgeDB_30) | +|:-------------------------------------------------------------------------------------------|:------|:--------------| +| [CosFace-r18](https://bj.bcebos.com/paddlehub/fastdeploy/glint360k_cosface_r18.onnx) | 92MB | 97.7 | +| [CosFace-r34](https://bj.bcebos.com/paddlehub/fastdeploy/glint360k_cosface_r34.onnx) | 131MB | 98.3 | +| [CosFace-r50](https://bj.bcebos.com/paddlehub/fastdeploy/glint360k_cosface_r50.onnx) | 167MB | 98.3 | +| [CosFace-r100](https://bj.bcebos.com/paddlehub/fastdeploy/glint360k_cosface_r100.onnx) | 249MB | 98.4 | +| [ArcFace-r18](https://bj.bcebos.com/paddlehub/fastdeploy/ms1mv3_arcface_r18.onnx) | 92MB | 97.7 | +| [ArcFace-r34](https://bj.bcebos.com/paddlehub/fastdeploy/ms1mv3_arcface_r34.onnx) | 131MB | 98.1 | +| [ArcFace-r50](https://bj.bcebos.com/paddlehub/fastdeploy/ms1mv3_arcface_r50.onnx) | 167MB | - | +| [ArcFace-r100](https://bj.bcebos.com/paddlehub/fastdeploy/ms1mv3_arcface_r100.onnx) | 249MB | 98.4 | +| [ArcFace-r100_lr0.1](https://bj.bcebos.com/paddlehub/fastdeploy/ms1mv3_r100_lr01.onnx) | 249MB | 98.4 | +| [PartialFC-r34](https://bj.bcebos.com/paddlehub/fastdeploy/partial_fc_glint360k_r50.onnx) | 167MB | - | +| [PartialFC-r50](https://bj.bcebos.com/paddlehub/fastdeploy/partial_fc_glint360k_r100.onnx) | 249MB | - | + + +## 转换为RKNPU模型 + +```bash +wget https://bj.bcebos.com/paddlehub/fastdeploy/ms1mv3_arcface_r18.onnx + +python -m paddle2onnx.optimize --input_model ./ms1mv3_arcface_r18/ms1mv3_arcface_r18.onnx \ + --output_model ./ms1mv3_arcface_r18/ms1mv3_arcface_r18.onnx \ + --input_shape_dict "{'data':[1,3,112,112]}" + +python /Path/To/FastDeploy/tools/rknpu2/export.py \ + --config_path tools/rknpu2/config/arcface_unquantized.yaml \ + --target_platform rk3588 +``` + +## 详细部署文档 + +- [Python部署](python) +- [C++部署](cpp) + + +## 版本说明 + +- 本版本文档和代码基于[InsightFace CommitID:babb9a5](https://github.com/deepinsight/insightface/commit/babb9a5) 编写 diff --git a/examples/vision/faceid/insightface/rknpu2/cpp/CMakeLists.txt b/examples/vision/faceid/insightface/rknpu2/cpp/CMakeLists.txt new file mode 100644 index 00000000000..ce3b467ba6f --- /dev/null +++ b/examples/vision/faceid/insightface/rknpu2/cpp/CMakeLists.txt @@ -0,0 +1,11 @@ +PROJECT(infer_demo C CXX) +CMAKE_MINIMUM_REQUIRED (VERSION 3.10) + +option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") + +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) + +include_directories(${FASTDEPLOY_INCS}) + +add_executable(infer_arcface_demo ${PROJECT_SOURCE_DIR}/infer_arcface.cc) +target_link_libraries(infer_arcface_demo ${FASTDEPLOY_LIBS}) diff --git a/examples/vision/faceid/insightface/rknpu2/cpp/README.md b/examples/vision/faceid/insightface/rknpu2/cpp/README.md new file mode 100644 index 00000000000..bb88804cdea --- /dev/null +++ b/examples/vision/faceid/insightface/rknpu2/cpp/README.md @@ -0,0 +1,136 @@ +[English](README.md) | 简体中文 +# InsightFace C++部署示例 + +FastDeploy支持在RKNPU上部署包括ArcFace\CosFace\VPL\Partial_FC在内的InsightFace系列模型。 + +本目录下提供`infer_arcface.cc`快速完成InsighFace模型包括ArcFace在CPU/RKNPU加速部署的示例。 + + +在部署前,需确认以下两个步骤: + +1. 软硬件环境满足要求 +2. 根据开发环境,下载预编译部署库或者从头编译FastDeploy仓库 + +以上步骤请参考[RK2代NPU部署库编译](../../../../../../docs/cn/build_and_install/rknpu2.md)实现 + +在本目录执行如下命令即可完成编译测试 + +```bash +mkdir build +cd build +# FastDeploy version need >=1.0.3 +wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz +tar xvf fastdeploy-linux-x64-x.x.x.tgz +cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x +make -j + +# 下载官方转换好的ArcFace模型文件和测试图片 +wget https://bj.bcebos.com/paddlehub/fastdeploy/ms1mv3_arcface_r18.onnx +wget https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/face_demo.zip +unzip face_demo.zip + +# CPU推理 +./infer_arcface_demo ms1mv3_arcface_r100.onnx face_0.jpg face_1.jpg face_2.jpg 0 +# RKNPU推理 +./infer_arcface_demo ms1mv3_arcface_r100.onnx face_0.jpg face_1.jpg face_2.jpg 1 +``` + +运行完成可视化结果如下图所示 + +
+ + + +
+ +以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考: +- [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md) + +## InsightFace C++接口 + +### ArcFace类 + +```c++ +fastdeploy::vision::faceid::ArcFace( + const string& model_file, + const string& params_file = "", + const RuntimeOption& runtime_option = RuntimeOption(), + const ModelFormat& model_format = ModelFormat::ONNX) +``` + +ArcFace模型加载和初始化,其中model_file为导出的ONNX模型格式。 + +### CosFace类 + +```c++ +fastdeploy::vision::faceid::CosFace( + const string& model_file, + const string& params_file = "", + const RuntimeOption& runtime_option = RuntimeOption(), + const ModelFormat& model_format = ModelFormat::ONNX) +``` + +CosFace模型加载和初始化,其中model_file为导出的ONNX模型格式。 + +### PartialFC类 + +```c++ +fastdeploy::vision::faceid::PartialFC( + const string& model_file, + const string& params_file = "", + const RuntimeOption& runtime_option = RuntimeOption(), + const ModelFormat& model_format = ModelFormat::ONNX) +``` + +PartialFC模型加载和初始化,其中model_file为导出的ONNX模型格式。 + +### VPL类 + +```c++ +fastdeploy::vision::faceid::VPL( + const string& model_file, + const string& params_file = "", + const RuntimeOption& runtime_option = RuntimeOption(), + const ModelFormat& model_format = ModelFormat::ONNX) +``` + +VPL模型加载和初始化,其中model_file为导出的ONNX模型格式。 +**参数** + +> * **model_file**(str): 模型文件路径 +> * **params_file**(str): 参数文件路径,当模型格式为ONNX时,此参数传入空字符串即可 +> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 +> * **model_format**(ModelFormat): 模型格式,默认为ONNX格式 + +#### Predict函数 + +> ```c++ +> ArcFace::Predict(const cv::Mat& im, FaceRecognitionResult* result) +> ``` +> +> 模型预测接口,输入图像直接输出检测结果。 +> +> **参数** +> +> > * **im**: 输入图像,注意需为HWC,BGR格式 +> > * **result**: 检测结果,包括检测框,各个框的置信度, FaceRecognitionResult说明参考[视觉模型预测结果](../../../../../docs/api/vision_results/) + +### 修改预处理以及后处理的参数 +预处理和后处理的参数的需要通过修改InsightFaceRecognitionPostprocessor,InsightFaceRecognitionPreprocessor的成员变量来进行修改。 + +#### InsightFaceRecognitionPreprocessor成员变量(预处理参数) +> > * **size**(vector<int>): 通过此参数修改预处理过程中resize的大小,包含两个整型元素,表示[width, height], 默认值为[112, 112], + 通过InsightFaceRecognitionPreprocessor::SetSize(std::vector& size)来进行修改 +> > * **alpha**(vector<float>): 预处理归一化的alpha值,计算公式为`x'=x*alpha+beta`,alpha默认为[1. / 127.5, 1.f / 127.5, 1. / 127.5], + 通过InsightFaceRecognitionPreprocessor::SetAlpha(std::vector& alpha)来进行修改 +> > * **beta**(vector<float>): 预处理归一化的beta值,计算公式为`x'=x*alpha+beta`,beta默认为[-1.f, -1.f, -1.f], + 通过InsightFaceRecognitionPreprocessor::SetBeta(std::vector& beta)来进行修改 + +#### InsightFaceRecognitionPostprocessor成员变量(后处理参数) +> > * **l2_normalize**(bool): 输出人脸向量之前是否执行l2归一化,默认false, + InsightFaceRecognitionPostprocessor::SetL2Normalize(bool& l2_normalize)来进行修改 + +- [模型介绍](../../../) +- [Python部署](../python) +- [视觉模型预测结果](../../../../../../docs/api/vision_results/README.md) +- [如何切换模型推理后端引擎](../../../../../../docs/cn/faq/how_to_change_backend.md) diff --git a/examples/vision/faceid/insightface/rknpu2/cpp/infer_arcface.cc b/examples/vision/faceid/insightface/rknpu2/cpp/infer_arcface.cc new file mode 100644 index 00000000000..f9a4d85ff0c --- /dev/null +++ b/examples/vision/faceid/insightface/rknpu2/cpp/infer_arcface.cc @@ -0,0 +1,123 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision.h" + +void CpuInfer(const std::string& model_file, + const std::vector& image_file) { + auto model = fastdeploy::vision::faceid::ArcFace(model_file, ""); + + cv::Mat face0 = cv::imread(image_file[0]); + fastdeploy::vision::FaceRecognitionResult res0; + if (!model.Predict(face0, &res0)) { + std::cerr << "Prediction Failed." << std::endl; + } + + cv::Mat face1 = cv::imread(image_file[1]); + fastdeploy::vision::FaceRecognitionResult res1; + if (!model.Predict(face1, &res1)) { + std::cerr << "Prediction Failed." << std::endl; + } + + cv::Mat face2 = cv::imread(image_file[2]); + fastdeploy::vision::FaceRecognitionResult res2; + if (!model.Predict(face2, &res2)) { + std::cerr << "Prediction Failed." << std::endl; + return; + } + + std::cout << "Prediction Done!" << std::endl; + + std::cout << "--- [Face 0]:" << res0.Str(); + std::cout << "--- [Face 1]:" << res1.Str(); + std::cout << "--- [Face 2]:" << res2.Str(); + + float cosine01 = fastdeploy::vision::utils::CosineSimilarity( + res0.embedding, res1.embedding, + model.GetPostprocessor().GetL2Normalize()); + float cosine02 = fastdeploy::vision::utils::CosineSimilarity( + res0.embedding, res2.embedding, + model.GetPostprocessor().GetL2Normalize()); + std::cout << "Detect Done! Cosine 01: " << cosine01 + << ", Cosine 02:" << cosine02 << std::endl; +} + +void RKNPUInfer(const std::string& model_file, + const std::vector& image_file) { + std::string params_file; + auto option = fastdeploy::RuntimeOption(); + option.UseRKNPU2(); + auto format = fastdeploy::ModelFormat::RKNN; + auto model = fastdeploy::vision::faceid::ArcFace(model_file, params_file, + option, format); + model.GetPreprocessor().DisableNormalize(); + model.GetPreprocessor().DisablePermute(); + + cv::Mat face0 = cv::imread(image_file[0]); + fastdeploy::vision::FaceRecognitionResult res0; + if (!model.Predict(face0, &res0)) { + std::cerr << "Prediction Failed." << std::endl; + return; + } + + cv::Mat face1 = cv::imread(image_file[1]); + fastdeploy::vision::FaceRecognitionResult res1; + if (!model.Predict(face1, &res1)) { + std::cerr << "Prediction Failed." << std::endl; + return; + } + + cv::Mat face2 = cv::imread(image_file[2]); + fastdeploy::vision::FaceRecognitionResult res2; + if (!model.Predict(face2, &res2)) { + std::cerr << "Prediction Failed." << std::endl; + return; + } + + std::cout << "Prediction Done!" << std::endl; + + std::cout << "--- [Face 0]:" << res0.Str(); + std::cout << "--- [Face 1]:" << res1.Str(); + std::cout << "--- [Face 2]:" << res2.Str(); + + float cosine01 = fastdeploy::vision::utils::CosineSimilarity( + res0.embedding, res1.embedding, + model.GetPostprocessor().GetL2Normalize()); + float cosine02 = fastdeploy::vision::utils::CosineSimilarity( + res0.embedding, res2.embedding, + model.GetPostprocessor().GetL2Normalize()); + std::cout << "Detect Done! Cosine 01: " << cosine01 + << ", Cosine 02:" << cosine02 << std::endl; +} + +int main(int argc, char* argv[]) { + if (argc < 6) { + std::cout << "Usage: infer_demo path/to/model path/to/image run_option, " + "e.g ./infer_arcface_demo ms1mv3_arcface_r100.onnx " + "face_0.jpg face_1.jpg face_2.jpg 0" + << std::endl; + std::cout << "The data type of run_option is int, " + "0: run with cpu; 1: run with rknpu2." + << std::endl; + return -1; + } + + std::vector image_files = {argv[2], argv[3], argv[4]}; + if (std::atoi(argv[5]) == 0) { + CpuInfer(argv[1], image_files); + } else if (std::atoi(argv[5]) == 1) { + RKNPUInfer(argv[1], image_files); + } + return 0; +} diff --git a/examples/vision/faceid/insightface/rknpu2/python/README_CN.md b/examples/vision/faceid/insightface/rknpu2/python/README_CN.md new file mode 100644 index 00000000000..fd539f70875 --- /dev/null +++ b/examples/vision/faceid/insightface/rknpu2/python/README_CN.md @@ -0,0 +1,108 @@ +[English](README.md) | 简体中文 +# InsightFace Python部署示例 + +FastDeploy支持在RKNPU上部署包括ArcFace\CosFace\VPL\Partial_FC在内的InsightFace系列模型。 + +本目录下提供`infer_arcface.py`快速完成InsighFace模型包括ArcFace在CPU/RKNPU加速部署的示例。 + + +在部署前,需确认以下步骤: + +- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../../../docs/cn/build_and_install/rknpu2.md) + +```bash +#下载部署示例代码 +git clone https://github.com/PaddlePaddle/FastDeploy.git +cd examples/vision/faceid/insightface/python/ + +#下载ArcFace模型文件和测试图片 +wget https://bj.bcebos.com/paddlehub/fastdeploy/ms1mv3_arcface_r100.onnx +wget https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/face_demo.zip +unzip face_demo.zip + +# CPU推理 +python infer_arcface.py --model ms1mv3_arcface_r100.onnx \ + --face face_0.jpg \ + --face_positive face_1.jpg \ + --face_negative face_2.jpg \ + --device cpu +# GPU推理 +python infer_arcface.py --model ms1mv3_arcface_r100.onnx \ + --face face_0.jpg \ + --face_positive face_1.jpg \ + --face_negative face_2.jpg \ + --device gpu +``` + +运行完成可视化结果如下图所示 + +
+ + + +
+ +```bash +Prediction Done! +--- [Face 0]:FaceRecognitionResult: [Dim(512), Min(-2.309220), Max(2.372197), Mean(0.016987)] +--- [Face 1]:FaceRecognitionResult: [Dim(512), Min(-2.288258), Max(1.995104), Mean(-0.003400)] +--- [Face 2]:FaceRecognitionResult: [Dim(512), Min(-3.243411), Max(3.875866), Mean(-0.030682)] +Detect Done! Cosine 01: 0.814385, Cosine 02:-0.059388 + +``` + +## InsightFace Python接口 + +```python +fastdeploy.vision.faceid.ArcFace(model_file, params_file=None, runtime_option=None, model_format=ModelFormat.ONNX) +fastdeploy.vision.faceid.CosFace(model_file, params_file=None, runtime_option=None, model_format=ModelFormat.ONNX) +fastdeploy.vision.faceid.PartialFC(model_file, params_file=None, runtime_option=None, model_format=ModelFormat.ONNX) +fastdeploy.vision.faceid.VPL(model_file, params_file=None, runtime_option=None, model_format=ModelFormat.ONNX) +``` + +ArcFace模型加载和初始化,其中model_file为导出的ONNX模型格式 + +**参数** + +> * **model_file**(str): 模型文件路径 +> * **params_file**(str): 参数文件路径,当模型格式为ONNX格式时,此参数无需设定 +> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 +> * **model_format**(ModelFormat): 模型格式,默认为ONNX + +### predict函数 + +> ```python +> ArcFace.predict(image_data) +> ``` +> +> 模型预测结口,输入图像直接输出检测结果。 +> +> **参数** +> +> > * **image_data**(np.ndarray): 输入数据,注意需为HWC,BGR格式 + +> **返回** +> +> > 返回`fastdeploy.vision.FaceRecognitionResult`结构体,结构体说明参考文档[视觉模型预测结果](../../../../../docs/api/vision_results/) + +### 类成员属性 +#### 预处理参数 +用户可按照自己的实际需求,修改下列预处理参数,从而影响最终的推理和部署效果 + +#### AdaFacePreprocessor的成员变量 +以下变量为AdaFacePreprocessor的成员变量 +> > * **size**(list[int]): 通过此参数修改预处理过程中resize的大小,包含两个整型元素,表示[width, height], 默认值为[112, 112] +> > * **alpha**(list[float]): 预处理归一化的alpha值,计算公式为`x'=x*alpha+beta`,alpha默认为[1. / 127.5, 1.f / 127.5, 1. / 127.5] +> > * **beta**(list[float]): 预处理归一化的beta值,计算公式为`x'=x*alpha+beta`,beta默认为[-1.f, -1.f, -1.f] + +#### AdaFacePostprocessor的成员变量 +以下变量为AdaFacePostprocessor的成员变量 +> > * **l2_normalize**(bool): 输出人脸向量之前是否执行l2归一化,默认False + + +## 其它文档 + +- [InsightFace 模型介绍](..) +- [InsightFace C++部署](../cpp) +- [模型预测结果说明](../../../../../docs/api/vision_results/) +- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md) diff --git a/examples/vision/faceid/insightface/rknpu2/python/infer_arcface.py b/examples/vision/faceid/insightface/rknpu2/python/infer_arcface.py new file mode 100644 index 00000000000..90222a27273 --- /dev/null +++ b/examples/vision/faceid/insightface/rknpu2/python/infer_arcface.py @@ -0,0 +1,76 @@ +import fastdeploy as fd +import cv2 +import numpy as np + + +def cosine_similarity(a, b): + a = np.array(a) + b = np.array(b) + mul_a = np.linalg.norm(a, ord=2) + mul_b = np.linalg.norm(b, ord=2) + mul_ab = np.dot(a, b) + return mul_ab / (mul_a * mul_b) + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model", required=True, help="Path of insgihtface onnx model.") + parser.add_argument( + "--face", required=True, help="Path of test face image file.") + parser.add_argument( + "--face_positive", + required=True, + help="Path of test face_positive image file.") + parser.add_argument( + "--face_negative", + required=True, + help="Path of test face_negative image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + + if args.device.lower() == "npu": + option.use_rknpu2() + return option + + +args = parse_arguments() + +runtime_option = fd.RuntimeOption() +model = fd.vision.faceid.ArcFace(args.model, runtime_option=runtime_option) +if args.device.lower() == "npu": + runtime_option.use_rknpu2() + model.preprocessor.disable_normalize() + model.preprocessor.disable_permute() + +face0 = cv2.imread(args.face) +face1 = cv2.imread(args.face_positive) +face2 = cv2.imread(args.face_negative) + +result0 = model.predict(face0) +result1 = model.predict(face1) +result2 = model.predict(face2) + +embedding0 = result0.embedding +embedding1 = result1.embedding +embedding2 = result2.embedding + +cosine01 = cosine_similarity(embedding0, embedding1) +cosine02 = cosine_similarity(embedding0, embedding2) + +print(result0, end="") +print(result1, end="") +print(result2, end="") +print("Cosine 01: ", cosine01) +print("Cosine 02: ", cosine02) +print(model.runtime_option) diff --git a/fastdeploy/vision/detection/ppdet/model.h b/fastdeploy/vision/detection/ppdet/model.h index a3797bdb8b4..1a33c477138 100755 --- a/fastdeploy/vision/detection/ppdet/model.h +++ b/fastdeploy/vision/detection/ppdet/model.h @@ -253,7 +253,7 @@ class FASTDEPLOY_DECL PaddleYOLOv8 : public PPDetBase { const ModelFormat& model_format = ModelFormat::PADDLE) : PPDetBase(model_file, params_file, config_file, custom_option, model_format) { - valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER}; + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER, Backend::LITE}; valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; valid_kunlunxin_backends = {Backend::LITE}; initialized = Initialize(); diff --git a/fastdeploy/vision/faceid/contrib/insightface/base.cc b/fastdeploy/vision/faceid/contrib/insightface/base.cc old mode 100755 new mode 100644 index 35d8b808609..8b970cb82c0 --- a/fastdeploy/vision/faceid/contrib/insightface/base.cc +++ b/fastdeploy/vision/faceid/contrib/insightface/base.cc @@ -22,7 +22,6 @@ InsightFaceRecognitionBase::InsightFaceRecognitionBase( const std::string& model_file, const std::string& params_file, const fastdeploy::RuntimeOption& custom_option, const fastdeploy::ModelFormat& model_format) { - if (model_format == ModelFormat::ONNX) { valid_cpu_backends = {Backend::ORT}; valid_gpu_backends = {Backend::ORT, Backend::TRT}; @@ -31,6 +30,7 @@ InsightFaceRecognitionBase::InsightFaceRecognitionBase( valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; valid_kunlunxin_backends = {Backend::LITE}; } + valid_rknpu_backends = {Backend::RKNPU2}; runtime_option = custom_option; runtime_option.model_format = model_format; runtime_option.model_file = model_file; @@ -55,8 +55,9 @@ bool InsightFaceRecognitionBase::Predict(const cv::Mat& im, return true; } -bool InsightFaceRecognitionBase::BatchPredict(const std::vector& images, - std::vector* results){ +bool InsightFaceRecognitionBase::BatchPredict( + const std::vector& images, + std::vector* results) { std::vector fd_images = WrapMat(images); FDASSERT(images.size() == 1, "Only support batch = 1 now."); if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) { @@ -70,8 +71,9 @@ bool InsightFaceRecognitionBase::BatchPredict(const std::vector& images return false; } - if (!postprocessor_.Run(reused_output_tensors_, results)){ - FDERROR << "Failed to postprocess the inference results by runtime." << std::endl; + if (!postprocessor_.Run(reused_output_tensors_, results)) { + FDERROR << "Failed to postprocess the inference results by runtime." + << std::endl; return false; } return true; diff --git a/fastdeploy/vision/faceid/contrib/insightface/insightface_pybind.cc b/fastdeploy/vision/faceid/contrib/insightface/insightface_pybind.cc index b193d9fb795..fa0e2babf67 100644 --- a/fastdeploy/vision/faceid/contrib/insightface/insightface_pybind.cc +++ b/fastdeploy/vision/faceid/contrib/insightface/insightface_pybind.cc @@ -19,83 +19,120 @@ void BindInsightFace(pybind11::module& m) { pybind11::class_( m, "InsightFaceRecognitionPreprocessor") .def(pybind11::init()) - .def("run", [](vision::faceid::InsightFaceRecognitionPreprocessor& self, - std::vector& im_list) { - std::vector images; - for (size_t i = 0; i < im_list.size(); ++i) { - images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); - } - std::vector outputs; - if (!self.Run(&images, &outputs)) { - throw std::runtime_error("Failed to preprocess the input data in InsightFaceRecognitionPreprocessor."); - } - for (size_t i = 0; i < outputs.size(); ++i) { - outputs[i].StopSharing(); - } - return outputs; - }) - .def_property("permute", &vision::faceid::InsightFaceRecognitionPreprocessor::GetPermute, - &vision::faceid::InsightFaceRecognitionPreprocessor::SetPermute) - .def_property("alpha", &vision::faceid::InsightFaceRecognitionPreprocessor::GetAlpha, - &vision::faceid::InsightFaceRecognitionPreprocessor::SetAlpha) - .def_property("beta", &vision::faceid::InsightFaceRecognitionPreprocessor::GetBeta, - &vision::faceid::InsightFaceRecognitionPreprocessor::SetBeta) - .def_property("size", &vision::faceid::InsightFaceRecognitionPreprocessor::GetSize, - &vision::faceid::InsightFaceRecognitionPreprocessor::SetSize); + .def("run", + [](vision::faceid::InsightFaceRecognitionPreprocessor& self, + std::vector& im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + if (!self.Run(&images, &outputs)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "InsightFaceRecognitionPreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return outputs; + }) + .def( + "disable_normalize", + &vision::faceid::InsightFaceRecognitionPreprocessor::DisableNormalize) + .def("disable_permute", + &vision::faceid::InsightFaceRecognitionPreprocessor::DisablePermute) + .def_property( + "alpha", + &vision::faceid::InsightFaceRecognitionPreprocessor::GetAlpha, + &vision::faceid::InsightFaceRecognitionPreprocessor::SetAlpha) + .def_property( + "beta", &vision::faceid::InsightFaceRecognitionPreprocessor::GetBeta, + &vision::faceid::InsightFaceRecognitionPreprocessor::SetBeta) + .def_property( + "size", &vision::faceid::InsightFaceRecognitionPreprocessor::GetSize, + &vision::faceid::InsightFaceRecognitionPreprocessor::SetSize); pybind11::class_( m, "InsightFaceRecognitionPostprocessor") .def(pybind11::init()) - .def("run", [](vision::faceid::InsightFaceRecognitionPostprocessor& self, std::vector& inputs) { - std::vector results; - if (!self.Run(inputs, &results)) { - throw std::runtime_error("Failed to postprocess the runtime result in InsightFaceRecognitionPostprocessor."); - } - return results; - }) - .def("run", [](vision::faceid::InsightFaceRecognitionPostprocessor& self, std::vector& input_array) { - std::vector results; - std::vector inputs; - PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); - if (!self.Run(inputs, &results)) { - throw std::runtime_error("Failed to postprocess the runtime result in InsightFaceRecognitionPostprocessor."); - } - return results; - }) - .def_property("l2_normalize", &vision::faceid::InsightFaceRecognitionPostprocessor::GetL2Normalize, - &vision::faceid::InsightFaceRecognitionPostprocessor::SetL2Normalize); + .def("run", + [](vision::faceid::InsightFaceRecognitionPostprocessor& self, + std::vector& inputs) { + std::vector results; + if (!self.Run(inputs, &results)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "InsightFaceRecognitionPostprocessor."); + } + return results; + }) + .def("run", + [](vision::faceid::InsightFaceRecognitionPostprocessor& self, + std::vector& input_array) { + std::vector results; + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + if (!self.Run(inputs, &results)) { + throw std::runtime_error( + "Failed to postprocess the runtime result in " + "InsightFaceRecognitionPostprocessor."); + } + return results; + }) + .def_property( + "l2_normalize", + &vision::faceid::InsightFaceRecognitionPostprocessor::GetL2Normalize, + &vision::faceid::InsightFaceRecognitionPostprocessor::SetL2Normalize); pybind11::class_( m, "InsightFaceRecognitionBase") - .def(pybind11::init()) - .def("predict", [](vision::faceid::InsightFaceRecognitionBase& self, pybind11::array& data) { - cv::Mat im = PyArrayToCvMat(data); - vision::FaceRecognitionResult result; - self.Predict(im, &result); - return result; - }) - .def("batch_predict", [](vision::faceid::InsightFaceRecognitionBase& self, std::vector& data) { - std::vector images; - for (size_t i = 0; i < data.size(); ++i) { - images.push_back(PyArrayToCvMat(data[i])); - } - std::vector results; - self.BatchPredict(images, &results); - return results; - }) - .def_property_readonly("preprocessor", &vision::faceid::InsightFaceRecognitionBase::GetPreprocessor) - .def_property_readonly("postprocessor", &vision::faceid::InsightFaceRecognitionBase::GetPostprocessor); + .def(pybind11::init()) + .def("predict", + [](vision::faceid::InsightFaceRecognitionBase& self, + pybind11::array& data) { + cv::Mat im = PyArrayToCvMat(data); + vision::FaceRecognitionResult result; + self.Predict(im, &result); + return result; + }) + .def("batch_predict", + [](vision::faceid::InsightFaceRecognitionBase& self, + std::vector& data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }) + .def_property_readonly( + "preprocessor", + &vision::faceid::InsightFaceRecognitionBase::GetPreprocessor) + .def_property_readonly( + "postprocessor", + &vision::faceid::InsightFaceRecognitionBase::GetPostprocessor); - pybind11::class_(m, "ArcFace") - .def(pybind11::init()); + pybind11::class_(m, "ArcFace") + .def(pybind11::init()); - pybind11::class_(m, "CosFace") - .def(pybind11::init()); + pybind11::class_(m, "CosFace") + .def(pybind11::init()); - pybind11::class_(m, "PartialFC") - .def(pybind11::init()); + pybind11::class_(m, "PartialFC") + .def(pybind11::init()); - pybind11::class_(m, "VPL") - .def(pybind11::init()); + pybind11::class_(m, "VPL") + .def(pybind11::init()); } } // namespace fastdeploy diff --git a/fastdeploy/vision/faceid/contrib/insightface/model.h b/fastdeploy/vision/faceid/contrib/insightface/model.h index a1a8f128bd6..8ae5c950a9e 100755 --- a/fastdeploy/vision/faceid/contrib/insightface/model.h +++ b/fastdeploy/vision/faceid/contrib/insightface/model.h @@ -35,6 +35,8 @@ class FASTDEPLOY_DECL ArcFace : public InsightFaceRecognitionBase { if (model_format == ModelFormat::ONNX) { valid_cpu_backends = {Backend::ORT}; valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else if (model_format == ModelFormat::RKNN) { + valid_rknpu_backends = {Backend::RKNPU2}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; @@ -63,6 +65,8 @@ class FASTDEPLOY_DECL CosFace : public InsightFaceRecognitionBase { if (model_format == ModelFormat::ONNX) { valid_cpu_backends = {Backend::ORT}; valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else if (model_format == ModelFormat::RKNN) { + valid_rknpu_backends = {Backend::RKNPU2}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; @@ -83,13 +87,15 @@ class FASTDEPLOY_DECL PartialFC : public InsightFaceRecognitionBase { * \param[in] model_format Model format of the loaded model, default is Paddle format */ PartialFC(const std::string& model_file, const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const ModelFormat& model_format = ModelFormat::ONNX) + const RuntimeOption& custom_option = RuntimeOption(), + const ModelFormat& model_format = ModelFormat::ONNX) : InsightFaceRecognitionBase(model_file, params_file, custom_option, model_format) { if (model_format == ModelFormat::ONNX) { valid_cpu_backends = {Backend::ORT}; valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else if (model_format == ModelFormat::RKNN) { + valid_rknpu_backends = {Backend::RKNPU2}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; @@ -110,13 +116,15 @@ class FASTDEPLOY_DECL VPL : public InsightFaceRecognitionBase { * \param[in] model_format Model format of the loaded model, default is Paddle format */ VPL(const std::string& model_file, const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const ModelFormat& model_format = ModelFormat::ONNX) + const RuntimeOption& custom_option = RuntimeOption(), + const ModelFormat& model_format = ModelFormat::ONNX) : InsightFaceRecognitionBase(model_file, params_file, custom_option, model_format) { if (model_format == ModelFormat::ONNX) { valid_cpu_backends = {Backend::ORT}; valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else if (model_format == ModelFormat::RKNN) { + valid_rknpu_backends = {Backend::RKNPU2}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; diff --git a/fastdeploy/vision/faceid/contrib/insightface/preprocessor.cc b/fastdeploy/vision/faceid/contrib/insightface/preprocessor.cc old mode 100755 new mode 100644 index c846522cc1f..398a7016e0b --- a/fastdeploy/vision/faceid/contrib/insightface/preprocessor.cc +++ b/fastdeploy/vision/faceid/contrib/insightface/preprocessor.cc @@ -23,11 +23,10 @@ InsightFaceRecognitionPreprocessor::InsightFaceRecognitionPreprocessor() { size_ = {112, 112}; alpha_ = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f}; beta_ = {-1.f, -1.f, -1.f}; // RGB - permute_ = true; } - -bool InsightFaceRecognitionPreprocessor::Preprocess(FDMat * mat, FDTensor* output) { +bool InsightFaceRecognitionPreprocessor::Preprocess(FDMat* mat, + FDTensor* output) { // face recognition model's preprocess steps in insightface // reference: insightface/recognition/arcface_torch/inference.py // 1. Resize @@ -39,13 +38,16 @@ bool InsightFaceRecognitionPreprocessor::Preprocess(FDMat * mat, FDTensor* outpu if (resize_h != mat->Height() || resize_w != mat->Width()) { Resize::Run(mat, resize_w, resize_h); } - if (permute_) { + + if (!disable_permute_) { BGR2RGB::Run(mat); } - Convert::Run(mat, alpha_, beta_); - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); + if (!disable_normalize_) { + Convert::Run(mat, alpha_, beta_); + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + } mat->ShareWithTensor(output); output->ExpandDim(0); // reshape to n, h, w, c @@ -55,7 +57,8 @@ bool InsightFaceRecognitionPreprocessor::Preprocess(FDMat * mat, FDTensor* outpu bool InsightFaceRecognitionPreprocessor::Run(std::vector* images, std::vector* outputs) { if (images->empty()) { - FDERROR << "The size of input images should be greater than 0." << std::endl; + FDERROR << "The size of input images should be greater than 0." + << std::endl; return false; } FDASSERT(images->size() == 1, "Only support batch = 1 now."); diff --git a/fastdeploy/vision/faceid/contrib/insightface/preprocessor.h b/fastdeploy/vision/faceid/contrib/insightface/preprocessor.h index 88d0dce8dca..b73538df49e 100755 --- a/fastdeploy/vision/faceid/contrib/insightface/preprocessor.h +++ b/fastdeploy/vision/faceid/contrib/insightface/preprocessor.h @@ -54,10 +54,11 @@ class FASTDEPLOY_DECL InsightFaceRecognitionPreprocessor { /// Set beta. void SetBeta(std::vector& beta) { beta_ = beta; } - bool GetPermute() { return permute_; } + /// This function will disable normalize and hwc2chw in preprocessing step. + void DisableNormalize() { disable_normalize_ = true; } - /// Set permute. - void SetPermute(bool permute) { permute_ = permute; } + /// This function will disable hwc2chw in preprocessing step. + void DisablePermute() { disable_permute_ = true; } protected: bool Preprocess(FDMat* mat, FDTensor* output); @@ -70,9 +71,11 @@ class FASTDEPLOY_DECL InsightFaceRecognitionPreprocessor { // Argument for image preprocessing step, beta values for normalization, // default beta = {-1.f, -1.f, -1.f} std::vector beta_; + // for recording the switch of normalize + bool disable_normalize_ = false; // Argument for image preprocessing step, whether to swap the B and R channel, // such as BGR->RGB, default true. - bool permute_; + bool disable_permute_ = false; }; } // namespace faceid diff --git a/fastdeploy/vision/utils/face_align.cc b/fastdeploy/vision/utils/face_align.cc new file mode 100644 index 00000000000..63dcc43972d --- /dev/null +++ b/fastdeploy/vision/utils/face_align.cc @@ -0,0 +1,151 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// reference: +// https://github.com/deepinsight/insightface/blob/master/recognition/_tools_/cpp_align/face_align.h +#include "fastdeploy/vision/utils/utils.h" + +namespace fastdeploy { +namespace vision { +namespace utils { + +cv::Mat MeanAxis0(const cv::Mat& src) { + int num = src.rows; + int dim = src.cols; + cv::Mat output(1, dim, CV_32F); + for (int i = 0; i < dim; i++) { + float sum = 0; + for (int j = 0; j < num; j++) { + sum += src.at(j, i); + } + output.at(0, i) = sum / num; + } + return output; +} + +cv::Mat ElementwiseMinus(const cv::Mat& A, const cv::Mat& B) { + cv::Mat output(A.rows, A.cols, A.type()); + assert(B.cols == A.cols); + if (B.cols == A.cols) { + for (int i = 0; i < A.rows; i++) { + for (int j = 0; j < B.cols; j++) { + output.at(i, j) = A.at(i, j) - B.at(0, j); + } + } + } + return output; +} + +cv::Mat VarAxis0(const cv::Mat& src) { + cv::Mat temp_ = ElementwiseMinus(src, MeanAxis0(src)); + cv::multiply(temp_, temp_, temp_); + return MeanAxis0(temp_); +} + +int MatrixRank(cv::Mat M) { + cv::Mat w, u, vt; + cv::SVD::compute(M, w, u, vt); + cv::Mat1b non_zero_singular_values = w > 0.0001; + int rank = countNonZero(non_zero_singular_values); + return rank; +} + +cv::Mat SimilarTransform(cv::Mat& dst, cv::Mat& src) { + int num = dst.rows; + int dim = dst.cols; + cv::Mat src_mean = MeanAxis0(dst); + cv::Mat dst_mean = MeanAxis0(src); + cv::Mat src_demean = ElementwiseMinus(dst, src_mean); + cv::Mat dst_demean = ElementwiseMinus(src, dst_mean); + cv::Mat A = (dst_demean.t() * src_demean) / static_cast(num); + cv::Mat d(dim, 1, CV_32F); + d.setTo(1.0f); + if (cv::determinant(A) < 0) { + d.at(dim - 1, 0) = -1; + } + cv::Mat T = cv::Mat::eye(dim + 1, dim + 1, CV_32F); + cv::Mat U, S, V; + cv::SVD::compute(A, S, U, V); + int rank = MatrixRank(A); + if (rank == 0) { + assert(rank == 0); + } else if (rank == dim - 1) { + if (cv::determinant(U) * cv::determinant(V) > 0) { + T.rowRange(0, dim).colRange(0, dim) = U * V; + } else { + int s = d.at(dim - 1, 0) = -1; + d.at(dim - 1, 0) = -1; + + T.rowRange(0, dim).colRange(0, dim) = U * V; + cv::Mat diag_ = cv::Mat::diag(d); + cv::Mat twp = diag_ * V; // np.dot(np.diag(d), V.T) + cv::Mat B = cv::Mat::zeros(3, 3, CV_8UC1); + cv::Mat C = B.diag(0); + T.rowRange(0, dim).colRange(0, dim) = U * twp; + d.at(dim - 1, 0) = s; + } + } else { + cv::Mat diag_ = cv::Mat::diag(d); + cv::Mat twp = diag_ * V.t(); // np.dot(np.diag(d), V.T) + cv::Mat res = U * twp; // U + T.rowRange(0, dim).colRange(0, dim) = -U.t() * twp; + } + cv::Mat var_ = VarAxis0(src_demean); + float val = cv::sum(var_).val[0]; + cv::Mat res; + cv::multiply(d, S, res); + float scale = 1.0 / val * cv::sum(res).val[0]; + T.rowRange(0, dim).colRange(0, dim) = + -T.rowRange(0, dim).colRange(0, dim).t(); + cv::Mat temp1 = T.rowRange(0, dim).colRange(0, dim); // T[:dim, :dim] + cv::Mat temp2 = src_mean.t(); // src_mean.T + cv::Mat temp3 = temp1 * temp2; // np.dot(T[:dim, :dim], src_mean.T) + cv::Mat temp4 = scale * temp3; + T.rowRange(0, dim).colRange(dim, dim + 1) = -(temp4 - dst_mean.t()); + T.rowRange(0, dim).colRange(0, dim) *= scale; + return T; +} + +std::vector AlignFaceWithFivePoints( + cv::Mat& image, FaceDetectionResult& result, + std::vector> std_landmarks, + std::array output_size) { + FDASSERT(std_landmarks.size() == 5, "The landmarks.size() must be 5.") + FDASSERT(!image.empty(), "The input_image can't be empty.") + std::vector output_images(result.boxes.size()); + if (result.boxes.empty()) { + FDWARNING << "The result is empty." << std::endl; + return output_images; + } + + cv::Mat src(5, 2, CV_32FC1, std_landmarks.data()); + for (int i = 0; i < result.landmarks.size(); i += 5) { + cv::Mat dst(5, 2, CV_32FC1, result.landmarks.data() + i); + cv::Mat m = SimilarTransform(dst, src); + cv::Mat map_matrix; + cv::Rect map_matrix_r = cv::Rect(0, 0, 3, 2); + cv::Mat(m, map_matrix_r).copyTo(map_matrix); + cv::Mat cropped_image_aligned; + cv::warpAffine(image, cropped_image_aligned, map_matrix, + {output_size[0], output_size[1]}); + if (cropped_image_aligned.empty()) { + FDWARNING << "croppedImageAligned is empty." << std::endl; + } + output_images.push_back(cropped_image_aligned); + } + return output_images; +} +} // namespace utils +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/utils/utils.h b/fastdeploy/vision/utils/utils.h index c36d8d0369a..9f5106c4adb 100644 --- a/fastdeploy/vision/utils/utils.h +++ b/fastdeploy/vision/utils/utils.h @@ -70,16 +70,32 @@ void SortDetectionResult(DetectionResult* output); void SortDetectionResult(FaceDetectionResult* result); // L2 Norm / cosine similarity (for face recognition, ...) -FASTDEPLOY_DECL std::vector L2Normalize( - const std::vector& values); +FASTDEPLOY_DECL std::vector +L2Normalize(const std::vector& values); FASTDEPLOY_DECL float CosineSimilarity(const std::vector& a, const std::vector& b, bool normalized = true); -bool CropImageByBox(Mat& src_im, Mat* dst_im, - const std::vector& box, std::vector* center, - std::vector* scale, const float expandratio = 0.3); +/** \brief Do face align for model with five points. + * + * \param[in] image The original image + * \param[in] result FaceDetectionResult + * \param[in] std_landmarks Standard face template + * \param[in] output_size The size of output mat + */ +FASTDEPLOY_DECL std::vector AlignFaceWithFivePoints( + cv::Mat& image, FaceDetectionResult& result, + std::vector> std_landmarks = {{38.2946f, 51.6963f}, + {73.5318f, 51.5014f}, + {56.0252f, 71.7366f}, + {41.5493f, 92.3655f}, + {70.7299f, 92.2041f}}, + std::array output_size = {112, 112}); + +bool CropImageByBox(Mat& src_im, Mat* dst_im, const std::vector& box, + std::vector* center, std::vector* scale, + const float expandratio = 0.3); /** * Function: for keypoint detection model, fine positioning of keypoints in diff --git a/python/fastdeploy/vision/faceid/contrib/insightface/__init__.py b/python/fastdeploy/vision/faceid/contrib/insightface/__init__.py index 3353c8e4408..dd8cab5e20d 100644 --- a/python/fastdeploy/vision/faceid/contrib/insightface/__init__.py +++ b/python/fastdeploy/vision/faceid/contrib/insightface/__init__.py @@ -56,13 +56,17 @@ def beta(self): """ return self._preprocessor.beta - @property - def permute(self): + def disable_normalize(self): + """ + This function will disable normalize in preprocessing step. + """ + self._preprocessor.disable_normalize() + + def disable_permute(self): """ - Argument for image preprocessing step, whether to swap the B and R channel, - such as BGR->RGB, default true. + This function will disable hwc2chw in preprocessing step. """ - return self._preprocessor.permute + self._preprocessor.disable_permute() class InsightFaceRecognitionPostprocessor: diff --git a/tools/rknpu2/config/arcface_quantized.yaml b/tools/rknpu2/config/arcface_quantized.yaml new file mode 100644 index 00000000000..95642b5c9c4 --- /dev/null +++ b/tools/rknpu2/config/arcface_quantized.yaml @@ -0,0 +1,15 @@ +mean: + - + - 127.5 + - 127.5 + - 127.5 +std: + - + - 127.5 + - 127.5 + - 127.5 +model_path: ./ms1mv3_arcface_r18/ms1mv3_arcface_r18.onnx +outputs_nodes: +do_quantization: True +dataset: "./ms1mv3_arcface_r18/datasets.txt" +output_folder: "./ms1mv3_arcface_r18" diff --git a/tools/rknpu2/config/arcface_unquantized.yaml b/tools/rknpu2/config/arcface_unquantized.yaml new file mode 100644 index 00000000000..c11b285d362 --- /dev/null +++ b/tools/rknpu2/config/arcface_unquantized.yaml @@ -0,0 +1,15 @@ +mean: + - + - 127.5 + - 127.5 + - 127.5 +std: + - + - 127.5 + - 127.5 + - 127.5 +model_path: ./ms1mv3_arcface_r18/ms1mv3_arcface_r18.onnx +outputs_nodes: +do_quantization: False +dataset: "./ms1mv3_arcface_r18/datasets.txt" +output_folder: "./ms1mv3_arcface_r18"