DefTruth · DefTruth · Jan 15, 2023 · Jan 14, 2023 · Jan 14, 2023 · Jan 14, 2023
diff --git a/benchmark/benchmark_ppdet.py b/benchmark/benchmark_ppdet.py
@@ -17,7 +17,7 @@
 import os
 import numpy as np
 import time
-
+from tqdm import tqdm
 
 def parse_arguments():
     import argparse
@@ -263,6 +263,9 @@ def cpu_stat_func(self, q, pid, interval=0.0):
         elif "yolov3" in args.model:
             model = fd.vision.detection.YOLOv3(
                 model_file, params_file, config_file, runtime_option=option)
+        elif "yolov8" in args.model:
+            model = fd.vision.detection.PaddleYOLOv8(
+                model_file, params_file, config_file, runtime_option=option)
         elif "ppyolo_r50vd_dcn_1x_coco" in args.model or "ppyolov2_r101vd_dcn_365e_coco" in args.model:
             model = fd.vision.detection.PPYOLO(
                 model_file, params_file, config_file, runtime_option=option)
@@ -284,7 +287,7 @@ def cpu_stat_func(self, q, pid, interval=0.0):
 
         model.enable_record_time_of_runtime()
         im_ori = cv2.imread(args.image)
-        for i in range(args.iter_num):
+        for i in tqdm(range(args.iter_num)):
             im = im_ori
             start = time.time()
             result = model.predict(im)

diff --git a/docs/cn/faq/rknpu2/rknpu2.md b/docs/cn/faq/rknpu2/rknpu2.md
@@ -13,14 +13,22 @@ ONNX模型不能直接调用RK芯片中的NPU进行运算，需要把ONNX模型
 * ARM CPU使用ONNX框架进行测试
 * NPU均使用单核进行测试
 
-| 任务场景           | 模型                                                                                       | 模型版本(表示已经测试的版本)          | ARM CPU/RKNN速度(ms) |
-|----------------|------------------------------------------------------------------------------------------|--------------------------|--------------------|
-| Detection      | [Picodet](../../../../examples/vision/detection/paddledetection/rknpu2/README.md)        | Picodet-s                | 162/112            |
-| Detection      | [RKYOLOV5](../../../../examples/vision/detection/rkyolo/README.md)                       | YOLOV5-S-Relu(int8)      | -/57               |
-| Detection      | [RKYOLOX](../../../../examples/vision/detection/rkyolo/README.md)                        | -                        | -/-                |
-| Detection      | [RKYOLOV7](../../../../examples/vision/detection/rkyolo/README.md)                       | -                        | -/-                |
-| Segmentation   | [Unet](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)              | Unet-cityscapes          | -/-                |
-| Segmentation   | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md) | portrait(int8)           | 133/43             |
-| Segmentation   | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md) | human(int8)              | 133/43             |
-| Face Detection | [SCRFD](../../../../examples/vision/facedet/scrfd/rknpu2/README.md)                      | SCRFD-2.5G-kps-640(int8) | 108/42             |
-| Classification | [ResNet](../../../../examples/vision/classification/paddleclas/rknpu2/README.md)         | ResNet50_vd              | -/33               |
+| 任务场景                 | 模型                                                                                       | 模型版本(表示已经测试的版本)          | ARM CPU/RKNN速度(ms) |
+|----------------------|------------------------------------------------------------------------------------------|--------------------------|--------------------|
+| Detection            | [Picodet](../../../../examples/vision/detection/paddledetection/rknpu2/README.md)        | Picodet-s                | 162/112            |
+| Detection            | [RKYOLOV5](../../../../examples/vision/detection/rkyolo/README.md)                       | YOLOV5-S-Relu(int8)      | -/57               |
+| Detection            | [RKYOLOX](../../../../examples/vision/detection/rkyolo/README.md)                        | -                        | -/-                |
+| Detection            | [RKYOLOV7](../../../../examples/vision/detection/rkyolo/README.md)                       | -                        | -/-                |
+| Segmentation         | [Unet](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)              | Unet-cityscapes          | -/-                |
+| Segmentation         | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md) | portrait(int8)           | 133/43             |
+| Segmentation         | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md) | human(int8)              | 133/43             |
+| Face Detection       | [SCRFD](../../../../examples/vision/facedet/scrfd/rknpu2/README.md)                      | SCRFD-2.5G-kps-640(int8) | 108/42             |
+| Face FaceRecognition | [InsightFace](../../../../examples/vision/faceid/insightface/rknpu2/README_CN.md)        | ms1mv3_arcface_r18(int8) | 81/12              |
+| Classification       | [ResNet](../../../../examples/vision/classification/paddleclas/rknpu2/README.md)         | ResNet50_vd              | -/33               |
+
+## 预编译库下载
+
+为了方便大家进行开发，这里提供1.0.2版本的FastDeploy给大家使用
+
+- [FastDeploy RK356X c++ SDK](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-rk356X-1.0.2.tgz)
+- [FastDeploy RK3588 c++ SDK](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-rk3588-1.0.2.tgz)
diff --git a/examples/audio/silero-vad/README.md b/examples/audio/silero-vad/README.md
@@ -0,0 +1,41 @@
+English | [简体中文](README_CN.md)
+
+# Silero VAD - pre-trained enterprise-grade Voice Activity Detector
+
+The deployment model comes from [silero-vad](https://github.com/snakers4/silero-vad)
+
+![](https://user-images.githubusercontent.com/36505480/198026365-8da383e0-5398-4a12-b7f8-22c2c0059512.png)
+
+## Key Features
+
+* Stellar accuracy
+
+Silero VAD has excellent results on speech detection tasks.
+
+* Fast
+
+One audio chunk (30+ ms) takes less than 1ms to be processed on a single CPU thread. Using batching or GPU can also improve performance considerably.
+
+* General
+
+Silero VAD was trained on huge corpora that include over 100 languages and it performs well on audios from different domains with various background noise and quality levels.
+
+* Flexible sampling rate
+
+Silero VAD supports 8000 Hz and 16000 Hz sampling rates.
+
+## Download Pre-trained ONNX Model
+
+For developers' testing, model exported by VAD are provided below. Developers can download them directly.
+
+| 模型                                                         | 大小  | 备注                                                         |
+| :----------------------------------------------------------- | :---- | :----------------------------------------------------------- |
+| [silero-vad](https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz) | 1.8MB | This model file is sourced from [snakers4/silero-vad](https://github.com/snakers4/silero-vad)，MIT License |
+
+## Detailed Deployment Documents
+
+- [C++ deployment](cpp)
+
+## Source
+
+[https://github.com/snakers4/silero-vad](https://github.com/snakers4/silero-vad)
diff --git a/examples/audio/silero-vad/README_CN.md b/examples/audio/silero-vad/README_CN.md
@@ -0,0 +1,40 @@
+简体中文 ｜ [English](README.md)
+
+# Silero VAD 预训练的企业级语音活动检测器
+
+该部署模型来自于 [silero-vad](https://github.com/snakers4/silero-vad)
+
+![](https://user-images.githubusercontent.com/36505480/198026365-8da383e0-5398-4a12-b7f8-22c2c0059512.png)
+
+## 主要特征
+
+* 高准确率
+
+Silero VAD在语音检测任务上有着优异的成绩。
+
+* 快速推理
+
+一个音频块（30+ 毫秒）在单个 CPU 线程上处理时间不到 1毫秒。
+
+* 通用性
+
+Silero VAD 在包含100多种语言的庞大语料库上进行了训练，它在来自不同领域、具有不同背景噪音和质量水平的音频上表现良好。
+
+* 灵活采样率
+
+Silero VAD支持 8000 Hz和16000 Hz 采样率。
+
+## 下载预训练ONNX模型
+
+为了方便开发者的测试，下面提供了 VAD 导出模型，开发者可直接下载使用。
+| 模型                                                         | 大小  | 备注                                                         |
+| :----------------------------------------------------------- | :---- | :----------------------------------------------------------- |
+| [silero-vad](https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz) | 1.8MB | 此模型文件来源于[snakers4/silero-vad](https://github.com/snakers4/silero-vad)，MIT License |
+
+## 详细部署文档
+
+- [C++ 部署](cpp)
+
+## 模型来源
+
+[https://github.com/snakers4/silero-vad](https://github.com/snakers4/silero-vad)
diff --git a/examples/audio/silero-vad/cpp/CMakeLists.txt b/examples/audio/silero-vad/cpp/CMakeLists.txt
@@ -0,0 +1,17 @@
+cmake_minimum_required(VERSION 3.23)
+project(silero_vad)
+
+set(CMAKE_CXX_STANDARD 11)
+
+# 指定下载解压后的fastdeploy库路径
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+# 添加FastDeploy依赖头文件
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_onnx_silero_vad ${PROJECT_SOURCE_DIR}/infer_onnx_silero_vad.cc wav.h vad.cc vad.h)
+
+# 添加FastDeploy库依赖
+target_link_libraries(infer_onnx_silero_vad ${FASTDEPLOY_LIBS})
diff --git a/examples/audio/silero-vad/cpp/README.md b/examples/audio/silero-vad/cpp/README.md
@@ -0,0 +1,121 @@
+English | [简体中文](README_CN.md)
+
+# Silero VAD Deployment Example
+
+This directory provides examples that `infer_onnx_silero_vad` fast finishes the deployment of VAD models on CPU/GPU.
+
+Before deployment, two steps require confirmation.
+
+- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../docs/en/build_and_install/download_prebuilt_libraries.md).  
+- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../docs/en/build_and_install/download_prebuilt_libraries.md).
+
+Taking VAD inference on Linux as an example, the compilation test can be completed by executing the following command in this directory.
+
+```bash
+mkdir build
+cd build
+# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-x.x.x.tgz
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# Download the VAD model file and test audio. After decompression, place the model and test audio in the infer_onnx_silero_vad.cc peer directory
+wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav
+
+# inference
+./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav
+```
+
+- The above command works for Linux or MacOS. Refer to:
+  - [How to use FastDeploy C++ SDK in Windows](../../../../docs/en/faq/use_sdk_on_windows.md)  for SDK use-pattern in Windows
+
+## VAD C++ Interface
+
+### Vad Class
+
+```c++
+Vad::Vad(const std::string& model_file,
+    const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption())
+```
+
+**Parameter**
+
+> * **model_file**(str): Model file path
+> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default. (use the default configuration)
+
+### setAudioCofig function
+
+**Must be called before the `init` function**
+
+```c++
+void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms);
+```
+
+**Parameter**
+
+> * **sr**(int): sampling rate
+> * **frame_ms**(int): The length of each detection frame, and it is used to calculate the detection window size
+> * **threshold**(float): Result probability judgment threshold
+> * **min_silence_duration_ms**(int): The threshold used to calculate whether it is silence
+> * **speech_pad_ms**(int): Used to calculate the end time of the speech
+
+### init function
+
+Used to initialize audio-related parameters.
+
+```c++
+void Vad::init();
+```
+
+### loadAudio function
+
+Load audio.
+
+```c++
+void Vad::loadAudio(const std::string& wavPath)
+```
+
+**Parameter**
+
+> * **wavPath**(str): Audio file path
+
+### Predict function
+
+Used to start model reasoning.
+
+```c++
+bool Vad::Predict();
+```
+
+### getResult function
+
+**Used to obtain reasoning results**
+
+```c++
+std::vector<std::map<std::string, float>> Vad::getResult(
+            float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0,
+            float mergeThreshold = 0.3);
+```
+
+**Parameter**
+
+> * **removeThreshold**(float): Discard result fragment threshold; If some recognition results are too short, they will be discarded according to this threshold
+> * **expandHeadThreshold**(float): Offset at the beginning of the segment; The recognized start time may be too close to the voice part, so move forward the start time accordingly
+> * **expandTailThreshold**(float): Offset at the end of the segment; The recognized end time may be too close to the voice part, so the end time is moved back accordingly
+> * **mergeThreshold**(float): Some result segments are very close and can be combined into one, and the vocal segments can be combined accordingly
+
+**The output result format is**`std::vector<std::map<std::string, float>>`
+
+> Output a list, each element is a speech fragment
+>
+> Each clip can use 'start' to get the start time and 'end' to get the end time
+
+### Tips
+
+1. `The setAudioCofig`function must be called before the `init` function
+2. The sampling rate of the input audio file must be consistent with that set in the code
+
+- [Model Description](../)
+- [How to switch the model inference backend engine](../../../../docs/en/faq/how_to_change_backend.md)