diff --git a/fastdeploy/vision/classification/contrib/yolov5cls.h b/fastdeploy/vision/classification/contrib/yolov5cls.h index 1e2ff3f99ea..bbf93e9e47c 100755 --- a/fastdeploy/vision/classification/contrib/yolov5cls.h +++ b/fastdeploy/vision/classification/contrib/yolov5cls.h @@ -44,7 +44,7 @@ class FASTDEPLOY_DECL YOLOv5Cls : public FastDeployModel { /** \brief Predict the classification result for an input image * - * \param[in] im The input image data, comes from cv::imread() + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format * \param[in] result The output classification result will be writen to this structure * \param[in] topk Returns the topk classification result with the highest predicted probability, the default is 1 * \return true if the prediction successed, otherwise false diff --git a/fastdeploy/vision/detection/contrib/nanodet_plus.h b/fastdeploy/vision/detection/contrib/nanodet_plus.h index 57b472a0b56..34e353a730f 100644 --- a/fastdeploy/vision/detection/contrib/nanodet_plus.h +++ b/fastdeploy/vision/detection/contrib/nanodet_plus.h @@ -23,34 +23,51 @@ namespace fastdeploy { namespace vision { namespace detection { - +/*! @brief NanoDetPlus model object used when to load a NanoDetPlus model exported by NanoDet. + */ class FASTDEPLOY_DECL NanoDetPlus : public FastDeployModel { public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./nanodet_plus_320.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ NanoDetPlus(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - + /// Get model's name std::string ModelName() const { return "nanodet"; } - + /** \brief Predict the detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output detection result will be writen to this structure + * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.35 + * \param[in] nms_iou_threshold iou threashold for NMS, default is 0.5 + * \return true if the prediction successed, otherwise false + */ virtual bool Predict(cv::Mat* im, DetectionResult* result, float conf_threshold = 0.35f, float nms_iou_threshold = 0.5f); - // tuple of input size (width, height), e.g (320, 320) + /// tuple of input size (width, height), e.g (320, 320) std::vector size; - // padding value, size should be same with Channels + /// padding value, size should be the same as channels std::vector padding_value; - // keep aspect ratio or not when perform resize operation. - // This option is set as `false` by default in NanoDet-Plus. + /*! @brief + keep aspect ratio or not when perform resize operation. This option is set as `false` by default in NanoDet-Plus + */ bool keep_ratio; - // downsample strides for NanoDet-Plus to generate anchors, will - // take (8, 16, 32, 64) as default values. + /*! @brief + downsample strides for NanoDet-Plus to generate anchors, will take (8, 16, 32, 64) as default values + */ std::vector downsample_strides; - // for offseting the boxes by classes when using NMS, default 4096. + /// for offseting the boxes by classes when using NMS, default 4096 float max_wh; - // reg_max for GFL regression, default 7 + /// reg_max for GFL regression, default 7 int reg_max; private: diff --git a/fastdeploy/vision/detection/contrib/scaledyolov4.h b/fastdeploy/vision/detection/contrib/scaledyolov4.h index b2ce5d69776..963c892ec96 100644 --- a/fastdeploy/vision/detection/contrib/scaledyolov4.h +++ b/fastdeploy/vision/detection/contrib/scaledyolov4.h @@ -20,35 +20,53 @@ namespace fastdeploy { namespace vision { namespace detection { - +/*! @brief ScaledYOLOv4 model object used when to load a ScaledYOLOv4 model exported by ScaledYOLOv4. + */ class FASTDEPLOY_DECL ScaledYOLOv4 : public FastDeployModel { public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./scaled_yolov4.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ + ScaledYOLOv4(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); virtual std::string ModelName() const { return "ScaledYOLOv4"; } - + /** \brief Predict the detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output detection result will be writen to this structure + * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.25 + * \param[in] nms_iou_threshold iou threashold for NMS, default is 0.5 + * \return true if the prediction successed, otherwise false + */ virtual bool Predict(cv::Mat* im, DetectionResult* result, float conf_threshold = 0.25, float nms_iou_threshold = 0.5); - // tuple of (width, height) + /// tuple of (width, height) std::vector size; - // padding value, size should be same with Channels + /// padding value, size should be the same as channels std::vector padding_value; - // only pad to the minimum rectange which height and width is times of stride + /// only pad to the minimum rectange which height and width is times of stride bool is_mini_pad; - // while is_mini_pad = false and is_no_pad = true, will resize the image to - // the set size + /*! @brief + while is_mini_pad = false and is_no_pad = true, will resize the image to the set size + */ bool is_no_pad; - // if is_scale_up is false, the input image only can be zoom out, the maximum - // resize scale cannot exceed 1.0 + /*! @brief + if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0 + */ bool is_scale_up; - // padding stride, for is_mini_pad + /// padding stride, for is_mini_pad int stride; - // for offseting the boxes by classes when using NMS + /// for offseting the boxes by classes when using NMS float max_wh; private: @@ -70,7 +88,7 @@ class FASTDEPLOY_DECL ScaledYOLOv4 : public FastDeployModel { // or not.) // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This // value will - // auto check by fastdeploy after the internal Runtime already initialized. + // auto check by fastdeploy after the internal Runtime already initialized bool is_dynamic_input_; }; } // namespace detection diff --git a/fastdeploy/vision/detection/contrib/yolor.h b/fastdeploy/vision/detection/contrib/yolor.h index f979b2dd121..af6ebf46f86 100644 --- a/fastdeploy/vision/detection/contrib/yolor.h +++ b/fastdeploy/vision/detection/contrib/yolor.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,34 +20,51 @@ namespace fastdeploy { namespace vision { namespace detection { - +/*! @brief YOLOR model object used when to load a YOLOR model exported by YOLOR. + */ class FASTDEPLOY_DECL YOLOR : public FastDeployModel { public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolor.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ YOLOR(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); virtual std::string ModelName() const { return "YOLOR"; } - + /** \brief Predict the detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output detection result will be writen to this structure + * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.25 + * \param[in] nms_iou_threshold iou threashold for NMS, default is 0.5 + * \return true if the prediction successed, otherwise false + */ virtual bool Predict(cv::Mat* im, DetectionResult* result, float conf_threshold = 0.25, float nms_iou_threshold = 0.5); - // tuple of (width, height) + /// tuple of (width, height) std::vector size; - // padding value, size should be same with Channels + /// padding value, size should be the same as channels std::vector padding_value; - // only pad to the minimum rectange which height and width is times of stride + /// only pad to the minimum rectange which height and width is times of stride bool is_mini_pad; - // while is_mini_pad = false and is_no_pad = true, will resize the image to - // the set size + /*! @brief + while is_mini_pad = false and is_no_pad = true, will resize the image to the set size + */ bool is_no_pad; - // if is_scale_up is false, the input image only can be zoom out, the maximum - // resize scale cannot exceed 1.0 + /*! @brief + if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0 + */ bool is_scale_up; - // padding stride, for is_mini_pad + /// padding stride, for is_mini_pad int stride; - // for offseting the boxes by classes when using NMS + /// for offseting the boxes by classes when using NMS float max_wh; private: @@ -72,6 +89,7 @@ class FASTDEPLOY_DECL YOLOR : public FastDeployModel { // auto check by fastdeploy after the internal Runtime already initialized. bool is_dynamic_input_; }; + } // namespace detection } // namespace vision } // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov5.h b/fastdeploy/vision/detection/contrib/yolov5.h index 198a38d4b45..e899ad4c547 100644 --- a/fastdeploy/vision/detection/contrib/yolov5.h +++ b/fastdeploy/vision/detection/contrib/yolov5.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,9 +21,17 @@ namespace fastdeploy { namespace vision { namespace detection { - +/*! @brief YOLOv5 model object used when to load a YOLOv5 model exported by YOLOv5. + */ class FASTDEPLOY_DECL YOLOv5 : public FastDeployModel { public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolov5.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ YOLOv5(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); @@ -31,7 +39,14 @@ class FASTDEPLOY_DECL YOLOv5 : public FastDeployModel { ~YOLOv5(); std::string ModelName() const { return "yolov5"; } - + /** \brief Predict the detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output detection result will be writen to this structure + * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.25 + * \param[in] nms_iou_threshold iou threashold for NMS, default is 0.5 + * \return true if the prediction successed, otherwise false + */ virtual bool Predict(cv::Mat* im, DetectionResult* result, float conf_threshold = 0.25, float nms_iou_threshold = 0.5); @@ -62,23 +77,25 @@ class FASTDEPLOY_DECL YOLOv5 : public FastDeployModel { float conf_threshold, float nms_iou_threshold, bool multi_label, float max_wh = 7680.0); - // tuple of (width, height) + /// tuple of (width, height) std::vector size_; - // padding value, size should be same with Channels + /// padding value, size should be the same as channels std::vector padding_value_; - // only pad to the minimum rectange which height and width is times of stride + /// only pad to the minimum rectange which height and width is times of stride bool is_mini_pad_; - // while is_mini_pad = false and is_no_pad = true, will resize the image to - // the set size + /*! @brief + while is_mini_pad = false and is_no_pad = true, will resize the image to the set size + */ bool is_no_pad_; - // if is_scale_up is false, the input image only can be zoom out, the maximum - // resize scale cannot exceed 1.0 + /*! @brief + if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0 + */ bool is_scale_up_; - // padding stride, for is_mini_pad + /// padding stride, for is_mini_pad int stride_; - // for offseting the boxes by classes when using NMS + /// for offseting the boxes by classes when using NMS float max_wh_; - // for different strategies to get boxes when postprocessing + /// for different strategies to get boxes when postprocessing bool multi_label_; private: diff --git a/fastdeploy/vision/detection/contrib/yolov5lite.h b/fastdeploy/vision/detection/contrib/yolov5lite.h index 711880115d3..63717b01ce8 100644 --- a/fastdeploy/vision/detection/contrib/yolov5lite.h +++ b/fastdeploy/vision/detection/contrib/yolov5lite.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,9 +20,17 @@ namespace fastdeploy { namespace vision { namespace detection { - +/*! @brief YOLOv5Lite model object used when to load a YOLOv5Lite model exported by YOLOv5Lite. + */ class FASTDEPLOY_DECL YOLOv5Lite : public FastDeployModel { public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolov5lite.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ YOLOv5Lite(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); @@ -30,41 +38,55 @@ class FASTDEPLOY_DECL YOLOv5Lite : public FastDeployModel { ~YOLOv5Lite(); virtual std::string ModelName() const { return "YOLOv5-Lite"; } - + /** \brief Predict the detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output detection result will be writen to this structure + * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.45 + * \param[in] nms_iou_threshold iou threashold for NMS, default is 0.25 + * \return true if the prediction successed, otherwise false + */ virtual bool Predict(cv::Mat* im, DetectionResult* result, float conf_threshold = 0.45, float nms_iou_threshold = 0.25); + void UseCudaPreprocessing(int max_img_size = 3840 * 2160); - // tuple of (width, height) + /// tuple of (width, height) std::vector size; - // padding value, size should be same with Channels + /// padding value, size should be the same as channels std::vector padding_value; - // only pad to the minimum rectange which height and width is times of stride + /// only pad to the minimum rectange which height and width is times of stride bool is_mini_pad; - // while is_mini_pad = false and is_no_pad = true, will resize the image to - // the set size + /*! @brief + while is_mini_pad = false and is_no_pad = true, will resize the image to the set size + */ bool is_no_pad; - // if is_scale_up is false, the input image only can be zoom out, the maximum - // resize scale cannot exceed 1.0 + /*! @brief + if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0 + */ bool is_scale_up; - // padding stride, for is_mini_pad + /// padding stride, for is_mini_pad int stride; - // for offseting the boxes by classes when using NMS + /// for offseting the boxes by classes when using NMS float max_wh; - // downsample strides for YOLOv5Lite to generate anchors, will take - // (8,16,32) as default values, might have stride=64. + /*! @brief + downsample strides for YOLOv5Lite to generate anchors, will take (8,16,32) as default values, might have stride=64. + */ std::vector downsample_strides; - // anchors parameters, downsample_strides will take - // (8,16,32), each stride has three anchors with width and hight. + /*! @brief + anchors parameters, downsample_strides will take (8,16,32), each stride has three anchors with width and hight + */ std::vector> anchor_config; - // whether the model_file was exported with decode module. The official - // YOLOv5Lite/export.py script will export ONNX file without - // decode module. Please set it 'true' manually if the model file - // was exported with decode module. - // false : ONNX files without decode module. - // true : ONNX file with decode module. + /*! @brief + whether the model_file was exported with decode module. The official + YOLOv5Lite/export.py script will export ONNX file without + decode module. Please set it 'true' manually if the model file + was exported with decode module. + false : ONNX files without decode module. + true : ONNX file with decode module. + */ bool is_decode_exported; private: @@ -83,6 +105,7 @@ class FASTDEPLOY_DECL YOLOv5Lite : public FastDeployModel { bool Preprocess(Mat* mat, FDTensor* output, std::map>* im_info); + bool CudaPreprocess(Mat* mat, FDTensor* output, std::map>* im_info); diff --git a/fastdeploy/vision/detection/contrib/yolov6.h b/fastdeploy/vision/detection/contrib/yolov6.h index 68a224c84a8..b90197a2e1a 100644 --- a/fastdeploy/vision/detection/contrib/yolov6.h +++ b/fastdeploy/vision/detection/contrib/yolov6.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,9 +23,17 @@ namespace fastdeploy { namespace vision { namespace detection { - +/*! @brief YOLOv6 model object used when to load a YOLOv6 model exported by YOLOv6. + */ class FASTDEPLOY_DECL YOLOv6 : public FastDeployModel { public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolov6.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ YOLOv6(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); @@ -33,29 +41,40 @@ class FASTDEPLOY_DECL YOLOv6 : public FastDeployModel { ~YOLOv6(); std::string ModelName() const { return "YOLOv6"; } - + /** \brief Predict the detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output detection result will be writen to this structure + * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.25 + * \param[in] nms_iou_threshold iou threashold for NMS, default is 0.5 + * \return true if the prediction successed, otherwise false + */ virtual bool Predict(cv::Mat* im, DetectionResult* result, float conf_threshold = 0.25, float nms_iou_threshold = 0.5); + void UseCudaPreprocessing(int max_img_size = 3840 * 2160); - // tuple of (width, height) + /// tuple of (width, height) std::vector size; - // padding value, size should be same with Channels + /// padding value, size should be the same as channels std::vector padding_value; - // only pad to the minimum rectange which height and width is times of stride + /// only pad to the minimum rectange which height and width is times of stride bool is_mini_pad; - // while is_mini_pad = false and is_no_pad = true, will resize the image to - // the set size + /*! @brief + while is_mini_pad = false and is_no_pad = true, will resize the image to the set size + */ bool is_no_pad; - // if is_scale_up is false, the input image only can be zoom out, the maximum - // resize scale cannot exceed 1.0 + /*! @brief + if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0 + */ bool is_scale_up; - // padding stride, for is_mini_pad + /// padding stride, for is_mini_pad int stride; - // for offseting the boxes by classes when using NMS, default 4096 in - // meituan/YOLOv6 + /*! @brief + for offseting the boxes by classes when using NMS, default 4096 in meituan/YOLOv6 + */ float max_wh; private: diff --git a/fastdeploy/vision/detection/contrib/yolov7.h b/fastdeploy/vision/detection/contrib/yolov7.h index 872ff8dda04..e7921061d86 100644 --- a/fastdeploy/vision/detection/contrib/yolov7.h +++ b/fastdeploy/vision/detection/contrib/yolov7.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,9 +20,17 @@ namespace fastdeploy { namespace vision { namespace detection { - +/*! @brief YOLOv7 model object used when to load a YOLOv7 model exported by YOLOv7. + */ class FASTDEPLOY_DECL YOLOv7 : public FastDeployModel { public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolov7.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ YOLOv7(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); @@ -30,28 +38,38 @@ class FASTDEPLOY_DECL YOLOv7 : public FastDeployModel { ~YOLOv7(); virtual std::string ModelName() const { return "yolov7"; } - + /** \brief Predict the detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output detection result will be writen to this structure + * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.25 + * \param[in] nms_iou_threshold iou threashold for NMS, default is 0.5 + * \return true if the prediction successed, otherwise false + */ virtual bool Predict(cv::Mat* im, DetectionResult* result, float conf_threshold = 0.25, float nms_iou_threshold = 0.5); + void UseCudaPreprocessing(int max_img_size = 3840 * 2160); - // tuple of (width, height) + /// tuple of (width, height) std::vector size; - // padding value, size should be same with Channels + /// padding value, size should be the same as channels std::vector padding_value; - // only pad to the minimum rectange which height and width is times of stride + /// only pad to the minimum rectange which height and width is times of stride bool is_mini_pad; - // while is_mini_pad = false and is_no_pad = true, will resize the image to - // the set size + /*! @brief + while is_mini_pad = false and is_no_pad = true, will resize the image to the set size + */ bool is_no_pad; - // if is_scale_up is false, the input image only can be zoom out, the maximum - // resize scale cannot exceed 1.0 + /*! @brief + if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0 + */ bool is_scale_up; - // padding stride, for is_mini_pad + /// padding stride, for is_mini_pad int stride; - // for offseting the boxes by classes when using NMS + /// for offseting the boxes by classes when using NMS float max_wh; private: diff --git a/fastdeploy/vision/detection/contrib/yolov7end2end_ort.h b/fastdeploy/vision/detection/contrib/yolov7end2end_ort.h index f8cfef828ba..6e0c0d578b4 100644 --- a/fastdeploy/vision/detection/contrib/yolov7end2end_ort.h +++ b/fastdeploy/vision/detection/contrib/yolov7end2end_ort.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. //NOLINT // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,32 +20,48 @@ namespace fastdeploy { namespace vision { namespace detection { - +/*! @brief YOLOv7End2EndORT model object used when to load a YOLOv7End2EndORT model exported by YOLOv7. + */ class FASTDEPLOY_DECL YOLOv7End2EndORT : public FastDeployModel { public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolov7end2end_ort.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ YOLOv7End2EndORT(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); virtual std::string ModelName() const { return "yolov7end2end_ort"; } - + /** \brief Predict the detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output detection result will be writen to this structure + * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.25 + * \return true if the prediction successed, otherwise false + */ virtual bool Predict(cv::Mat* im, DetectionResult* result, float conf_threshold = 0.25); - // tuple of (width, height) + /// tuple of (width, height) std::vector size; - // padding value, size should be same with Channels + /// padding value, size should be the same as channels std::vector padding_value; - // only pad to the minimum rectange which height and width is times of stride + /// only pad to the minimum rectange which height and width is times of stride bool is_mini_pad; - // while is_mini_pad = false and is_no_pad = true, will resize the image to - // the set size + /*! @brief + while is_mini_pad = false and is_no_pad = true, will resize the image to the set size + */ bool is_no_pad; - // if is_scale_up is false, the input image only can be zoom out, the maximum - // resize scale cannot exceed 1.0 + /*! @brief + if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0 + */ bool is_scale_up; - // padding stride, for is_mini_pad + /// padding stride, for is_mini_pad int stride; private: diff --git a/fastdeploy/vision/detection/contrib/yolov7end2end_trt.h b/fastdeploy/vision/detection/contrib/yolov7end2end_trt.h index 7398679dd6a..3a2494d4934 100644 --- a/fastdeploy/vision/detection/contrib/yolov7end2end_trt.h +++ b/fastdeploy/vision/detection/contrib/yolov7end2end_trt.h @@ -20,9 +20,17 @@ namespace fastdeploy { namespace vision { namespace detection { - +/*! @brief YOLOv7End2EndTRT model object used when to load a YOLOv7End2EndTRT model exported by YOLOv7. + */ class FASTDEPLOY_DECL YOLOv7End2EndTRT : public FastDeployModel { public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolov7end2end_trt.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ YOLOv7End2EndTRT(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), @@ -31,25 +39,34 @@ class FASTDEPLOY_DECL YOLOv7End2EndTRT : public FastDeployModel { ~YOLOv7End2EndTRT(); virtual std::string ModelName() const { return "yolov7end2end_trt"; } - + /** \brief Predict the detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output detection result will be writen to this structure + * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.25 + * \return true if the prediction successed, otherwise false + */ virtual bool Predict(cv::Mat* im, DetectionResult* result, float conf_threshold = 0.25); + void UseCudaPreprocessing(int max_img_size = 3840 * 2160); - // tuple of (width, height) + /// tuple of (width, height) std::vector size; - // padding value, size should be same with Channels + /// padding value, size should be the same as channels std::vector padding_value; - // only pad to the minimum rectange which height and width is times of stride + /// only pad to the minimum rectange which height and width is times of stride bool is_mini_pad; - // while is_mini_pad = false and is_no_pad = true, will resize the image to - // the set size + /*! @brief + while is_mini_pad = false and is_no_pad = true, will resize the image to the set size + */ bool is_no_pad; - // if is_scale_up is false, the input image only can be zoom out, the maximum - // resize scale cannot exceed 1.0 + /*! @brief + if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0 + */ bool is_scale_up; - // padding stride, for is_mini_pad + /// padding stride, for is_mini_pad int stride; private: diff --git a/fastdeploy/vision/detection/contrib/yolox.h b/fastdeploy/vision/detection/contrib/yolox.h index 2c6c3960812..07522832cc0 100644 --- a/fastdeploy/vision/detection/contrib/yolox.h +++ b/fastdeploy/vision/detection/contrib/yolox.h @@ -23,32 +23,50 @@ namespace fastdeploy { namespace vision { namespace detection { - +/*! @brief YOLOX model object used when to load a YOLOX model exported by YOLOX. + */ class FASTDEPLOY_DECL YOLOX : public FastDeployModel { public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolox.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ YOLOX(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); std::string ModelName() const { return "YOLOX"; } - + /** \brief Predict the detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output detection result will be writen to this structure + * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.25 + * \param[in] nms_iou_threshold iou threashold for NMS, default is 0.5 + * \return true if the prediction successed, otherwise false + */ virtual bool Predict(cv::Mat* im, DetectionResult* result, float conf_threshold = 0.25, float nms_iou_threshold = 0.5); - // tuple of (width, height) + /// tuple of (width, height) std::vector size; - // padding value, size should be same with Channels + /// padding value, size should be the same as channels std::vector padding_value; - // whether the model_file was exported with decode module. The official - // YOLOX/tools/export_onnx.py script will export ONNX file without - // decode module. Please set it 'true' manually if the model file - // was exported with decode module. + /*! @brief + whether the model_file was exported with decode module. The official + YOLOX/tools/export_onnx.py script will export ONNX file without + decode module. Please set it 'true' manually if the model file + was exported with decode module. + */ bool is_decode_exported; - // downsample strides for YOLOX to generate anchors, will take - // (8,16,32) as default values, might have stride=64. + /*! @brief + downsample strides for YOLOX to generate anchors, will take (8,16,32) as default values, might have stride=64 + */ std::vector downsample_strides; - // for offseting the boxes by classes when using NMS, default 4096. + /// for offseting the boxes by classes when using NMS, default 4096 float max_wh; private: diff --git a/fastdeploy/vision/detection/ppdet/ppyoloe.h b/fastdeploy/vision/detection/ppdet/ppyoloe.h index f7f6da7796a..fd2a71cb171 100644 --- a/fastdeploy/vision/detection/ppdet/ppyoloe.h +++ b/fastdeploy/vision/detection/ppdet/ppyoloe.h @@ -48,7 +48,7 @@ class FASTDEPLOY_DECL PPYOLOE : public FastDeployModel { /** \brief Predict the detection result for an input image * - * \param[in] im The input image data, comes from cv::imread() + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format * \param[in] result The output detection result will be writen to this structure * \return true if the prediction successed, otherwise false */ diff --git a/fastdeploy/vision/facedet/contrib/retinaface.h b/fastdeploy/vision/facedet/contrib/retinaface.h index bd0cdefcf40..33708586a94 100644 --- a/fastdeploy/vision/facedet/contrib/retinaface.h +++ b/fastdeploy/vision/facedet/contrib/retinaface.h @@ -20,32 +20,51 @@ namespace fastdeploy { namespace vision { - +/** \brief All object face detection model APIs are defined inside this namespace + * + */ namespace facedet { - +/*! @brief RetinaFace model object used when to load a RetinaFace model exported by RetinaFace. + */ class FASTDEPLOY_DECL RetinaFace : public FastDeployModel { public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./retinaface.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ RetinaFace(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); std::string ModelName() const { return "Pytorch_Retinaface"; } - + /** \brief Predict the face detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output face detection result will be writen to this structure + * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.25 + * \param[in] nms_iou_threshold iou threashold for NMS, default is 0.4 + * \return true if the prediction successed, otherwise false + */ virtual bool Predict(cv::Mat* im, FaceDetectionResult* result, float conf_threshold = 0.25f, float nms_iou_threshold = 0.4f); - // tuple of (width, height), default (640, 640) + /// tuple of (width, height), default (640, 640) std::vector size; - // variance in RetinaFace's prior-box(anchor) generate process, - // default (0.1, 0.2) + /*! @brief + variance in RetinaFace's prior-box(anchor) generate process, default (0.1, 0.2) + */ std::vector variance; - // downsample strides (namely, steps) for RetinaFace to - // generate anchors, will take (8,16,32) as default values. + /*! @brief + downsample strides (namely, steps) for RetinaFace to generate anchors, will take (8,16,32) as default values + */ std::vector downsample_strides; - // min sizes, width and height for each anchor. + /// min sizes, width and height for each anchor std::vector> min_sizes; - // landmarks_per_face, default 5 in RetinaFace + /// landmarks_per_face, default 5 in RetinaFace int landmarks_per_face; private: diff --git a/fastdeploy/vision/facedet/contrib/scrfd.h b/fastdeploy/vision/facedet/contrib/scrfd.h index 0323e982a09..964e9ac494d 100644 --- a/fastdeploy/vision/facedet/contrib/scrfd.h +++ b/fastdeploy/vision/facedet/contrib/scrfd.h @@ -23,44 +23,61 @@ namespace fastdeploy { namespace vision { namespace facedet { - +/*! @brief SCRFD model object used when to load a SCRFD model exported by SCRFD. + */ class FASTDEPLOY_DECL SCRFD : public FastDeployModel { public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./scrfd.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ SCRFD(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); std::string ModelName() const { return "scrfd"; } - + /** \brief Predict the face detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output face detection result will be writen to this structure + * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.25 + * \param[in] nms_iou_threshold iou threashold for NMS, default is 0.4 + * \return true if the prediction successed, otherwise false + */ virtual bool Predict(cv::Mat* im, FaceDetectionResult* result, float conf_threshold = 0.25f, float nms_iou_threshold = 0.4f); - // tuple of (width, height), default (640, 640) + /// tuple of (width, height), default (640, 640) std::vector size; - // downsample strides (namely, steps) for SCRFD to - // generate anchors, will take (8,16,32) as default values. - // padding value, size should be same with Channels + /// padding value, size should be the same as channels std::vector padding_value; - // only pad to the minimum rectange which height and width is times of stride + /// only pad to the minimum rectange which height and width is times of stride bool is_mini_pad; - // while is_mini_pad = false and is_no_pad = true, will resize the image to - // the set size + /*! @brief + while is_mini_pad = false and is_no_pad = true, will resize the image to the set size + */ bool is_no_pad; - // if is_scale_up is false, the input image only can be zoom out, the maximum - // resize scale cannot exceed 1.0 + /*! @brief + if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0 + */ bool is_scale_up; - // padding stride, for is_mini_pad + /// padding stride, for is_mini_pad int stride; - // for offseting the boxes by classes when using NMS + /*! @brief + downsample strides (namely, steps) for SCRFD to generate anchors, will take (8,16,32) as default values + */ std::vector downsample_strides; - // landmarks_per_face, default 5 in SCRFD + /// landmarks_per_face, default 5 in SCRFD int landmarks_per_face; - // are the outputs of onnx file with key points features or not + /// the outputs of onnx file with key points features or not bool use_kps; - // the upperbond number of boxes processed by nms. + /// the upperbond number of boxes processed by nms int max_nms; - // number anchors of each stride + /// number anchors of each stride unsigned int num_anchors; private: diff --git a/fastdeploy/vision/facedet/contrib/ultraface.h b/fastdeploy/vision/facedet/contrib/ultraface.h index 8627f49aa5f..ebe1da986b5 100644 --- a/fastdeploy/vision/facedet/contrib/ultraface.h +++ b/fastdeploy/vision/facedet/contrib/ultraface.h @@ -22,9 +22,17 @@ namespace fastdeploy { namespace vision { namespace facedet { - +/*! @brief UltraFace model object used when to load a UltraFace model exported by UltraFace. + */ class FASTDEPLOY_DECL UltraFace : public FastDeployModel { public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./ultraface.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ UltraFace(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); @@ -32,12 +40,19 @@ class FASTDEPLOY_DECL UltraFace : public FastDeployModel { std::string ModelName() const { return "Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB"; } - + /** \brief Predict the face detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output face detection result will be writen to this structure + * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.7 + * \param[in] nms_iou_threshold iou threashold for NMS, default is 0.3 + * \return true if the prediction successed, otherwise false + */ virtual bool Predict(cv::Mat* im, FaceDetectionResult* result, float conf_threshold = 0.7f, float nms_iou_threshold = 0.3f); - // tuple of (width, height), default (320, 240) + /// tuple of (width, height), default (320, 240) std::vector size; private: diff --git a/fastdeploy/vision/facedet/contrib/yolov5face.h b/fastdeploy/vision/facedet/contrib/yolov5face.h index 42357b3eac4..921d68cce7b 100644 --- a/fastdeploy/vision/facedet/contrib/yolov5face.h +++ b/fastdeploy/vision/facedet/contrib/yolov5face.h @@ -22,36 +22,57 @@ namespace fastdeploy { namespace vision { namespace facedet { - +/*! @brief YOLOv5Face model object used when to load a YOLOv5Face model exported by YOLOv5Face. + */ class FASTDEPLOY_DECL YOLOv5Face : public FastDeployModel { public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./yolov5face.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ YOLOv5Face(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); std::string ModelName() const { return "yolov5-face"; } - + /** \brief Predict the face detection result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output face detection result will be writen to this structure + * \param[in] conf_threshold confidence threashold for postprocessing, default is 0.25 + * \param[in] nms_iou_threshold iou threashold for NMS, default is 0.5 + * \return true if the prediction successed, otherwise false + */ virtual bool Predict(cv::Mat* im, FaceDetectionResult* result, float conf_threshold = 0.25, float nms_iou_threshold = 0.5); - // tuple of (width, height) + /// tuple of (width, height) std::vector size; - // padding value, size should be same with Channels + /// padding value, size should be the same as channels std::vector padding_value; - // only pad to the minimum rectange which height and width is times of stride + /// only pad to the minimum rectange which height and width is times of stride bool is_mini_pad; - // while is_mini_pad = false and is_no_pad = true, will resize the image to - // the set size + /*! @brief + while is_mini_pad = false and is_no_pad = true, will resize the image to the set size + */ + bool is_no_pad; - // if is_scale_up is false, the input image only can be zoom out, the maximum - // resize scale cannot exceed 1.0 + /*! @brief + if is_scale_up is false, the input image only can be zoom out, the maximum resize scale cannot exceed 1.0 + */ + bool is_scale_up; - // padding stride, for is_mini_pad + /// padding stride, for is_mini_pad int stride; - // setup the number of landmarks for per face (if have), default 5 in - // official yolov5face note that, the outupt tensor's shape must be: - // (1,n,4+1+2*landmarks_per_face+1=box+obj+landmarks+cls) + /*! @brief + setup the number of landmarks for per face (if have), default 5 in + official yolov5face note that, the outupt tensor's shape must be: + (1,n,4+1+2*landmarks_per_face+1=box+obj+landmarks+cls) + */ int landmarks_per_face; private: diff --git a/fastdeploy/vision/faceid/contrib/adaface.h b/fastdeploy/vision/faceid/contrib/adaface.h index 0592138a36e..a11d612989e 100644 --- a/fastdeploy/vision/faceid/contrib/adaface.h +++ b/fastdeploy/vision/faceid/contrib/adaface.h @@ -21,11 +21,21 @@ namespace fastdeploy { namespace vision { - +/** \brief All object face recognition model APIs are defined inside this namespace + * + */ namespace faceid { - +/*! @brief AdaFace model object used when to load a AdaFace model exported by AdaFacePaddleCLas. + */ class FASTDEPLOY_DECL AdaFace : public InsightFaceRecognitionModel { public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./adaface.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is PADDLE format + */ AdaFace(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE); @@ -33,7 +43,12 @@ class FASTDEPLOY_DECL AdaFace : public InsightFaceRecognitionModel { std::string ModelName() const override { return "Zheng-Bicheng/AdaFacePaddleCLas"; } - + /** \brief Predict the face recognition result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output face recognition result will be writen to this structure + * \return true if the prediction successed, otherwise false + */ bool Predict(cv::Mat* im, FaceRecognitionResult* result) override; private: diff --git a/fastdeploy/vision/faceid/contrib/arcface.h b/fastdeploy/vision/faceid/contrib/arcface.h index c5bd0f0644a..05478fc6691 100644 --- a/fastdeploy/vision/faceid/contrib/arcface.h +++ b/fastdeploy/vision/faceid/contrib/arcface.h @@ -23,9 +23,17 @@ namespace fastdeploy { namespace vision { namespace faceid { - +/*! @brief ArcFace model object used when to load a ArcFace model exported by IngsightFace. + */ class FASTDEPLOY_DECL ArcFace : public InsightFaceRecognitionModel { public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./arcface.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ ArcFace(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); @@ -33,7 +41,12 @@ class FASTDEPLOY_DECL ArcFace : public InsightFaceRecognitionModel { std::string ModelName() const override { return "deepinsight/insightface/recognition/arcface_pytorch"; } - + /** \brief Predict the face recognition result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output face recognition result will be writen to this structure + * \return true if the prediction successed, otherwise false + */ bool Predict(cv::Mat* im, FaceRecognitionResult* result) override; private: diff --git a/fastdeploy/vision/faceid/contrib/cosface.h b/fastdeploy/vision/faceid/contrib/cosface.h index d7fc273563a..dbf2e4ac615 100644 --- a/fastdeploy/vision/faceid/contrib/cosface.h +++ b/fastdeploy/vision/faceid/contrib/cosface.h @@ -23,9 +23,17 @@ namespace fastdeploy { namespace vision { namespace faceid { - +/*! @brief CosFace model object used when to load a CosFace model exported by IngsightFace. + */ class FASTDEPLOY_DECL CosFace : public InsightFaceRecognitionModel { public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./cosface.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ CosFace(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); @@ -33,7 +41,12 @@ class FASTDEPLOY_DECL CosFace : public InsightFaceRecognitionModel { std::string ModelName() const override { return "deepinsight/insightface/recognition/arcface_pytorch"; } - + /** \brief Predict the face recognition result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output face recognition result will be writen to this structure + * \return true if the prediction successed, otherwise false + */ bool Predict(cv::Mat* im, FaceRecognitionResult* result) override; private: diff --git a/fastdeploy/vision/faceid/contrib/insightface_rec.h b/fastdeploy/vision/faceid/contrib/insightface_rec.h index 2e4b970f9dc..e12765a20c3 100644 --- a/fastdeploy/vision/faceid/contrib/insightface_rec.h +++ b/fastdeploy/vision/faceid/contrib/insightface_rec.h @@ -22,9 +22,17 @@ namespace fastdeploy { namespace vision { namespace faceid { - +/*! @brief CosFace model object used when to load a CosFace model exported by IngsightFace. + */ class FASTDEPLOY_DECL InsightFaceRecognitionModel : public FastDeployModel { public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./arcface.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ InsightFaceRecognitionModel( const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), @@ -32,15 +40,22 @@ class FASTDEPLOY_DECL InsightFaceRecognitionModel : public FastDeployModel { virtual std::string ModelName() const { return "deepinsight/insightface"; } - // tuple of (width, height), default (112, 112) + /// tuple of (width, height), default (112, 112) std::vector size; + /// alpha values for normalization std::vector alpha; + /// beta values for normalization std::vector beta; - // whether to swap the B and R channel, such as BGR->RGB, default true. + /// whether to swap the B and R channel, such as BGR->RGB, default true. bool swap_rb; - // whether to apply l2 normalize to embedding values, default; + /// whether to apply l2 normalize to embedding values, default; bool l2_normalize; - + /** \brief Predict the face recognition result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output face recognition result will be writen to this structure + * \return true if the prediction successed, otherwise false + */ virtual bool Predict(cv::Mat* im, FaceRecognitionResult* result); virtual bool Initialize(); diff --git a/fastdeploy/vision/faceid/contrib/partial_fc.h b/fastdeploy/vision/faceid/contrib/partial_fc.h index 275b33cfaa1..fac525be762 100644 --- a/fastdeploy/vision/faceid/contrib/partial_fc.h +++ b/fastdeploy/vision/faceid/contrib/partial_fc.h @@ -23,9 +23,17 @@ namespace fastdeploy { namespace vision { namespace faceid { - +/*! @brief PartialFC model object used when to load a PartialFC model exported by IngsightFace. + */ class FASTDEPLOY_DECL PartialFC : public InsightFaceRecognitionModel { public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./partial_fc.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ PartialFC(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); @@ -33,7 +41,12 @@ class FASTDEPLOY_DECL PartialFC : public InsightFaceRecognitionModel { std::string ModelName() const override { return "deepinsight/insightface/recognition/partial_fc"; } - + /** \brief Predict the face recognition result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output face recognition result will be writen to this structure + * \return true if the prediction successed, otherwise false + */ bool Predict(cv::Mat* im, FaceRecognitionResult* result) override; private: diff --git a/fastdeploy/vision/faceid/contrib/vpl.h b/fastdeploy/vision/faceid/contrib/vpl.h index a6c73d782de..c4a265072c7 100644 --- a/fastdeploy/vision/faceid/contrib/vpl.h +++ b/fastdeploy/vision/faceid/contrib/vpl.h @@ -23,9 +23,17 @@ namespace fastdeploy { namespace vision { namespace faceid { - +/*! @brief VPL model object used when to load a VPL model exported by IngsightFace. + */ class FASTDEPLOY_DECL VPL : public InsightFaceRecognitionModel { public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./vpl.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ VPL(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); @@ -33,7 +41,12 @@ class FASTDEPLOY_DECL VPL : public InsightFaceRecognitionModel { std::string ModelName() const override { return "deepinsight/insightface/recognition/vpl"; } - + /** \brief Predict the face recognition result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output face recognition result will be writen to this structure + * \return true if the prediction successed, otherwise false + */ bool Predict(cv::Mat* im, FaceRecognitionResult* result) override; private: diff --git a/fastdeploy/vision/matting/contrib/modnet.h b/fastdeploy/vision/matting/contrib/modnet.h index a50277205f2..3e53e1ba6f9 100644 --- a/fastdeploy/vision/matting/contrib/modnet.h +++ b/fastdeploy/vision/matting/contrib/modnet.h @@ -22,22 +22,37 @@ namespace fastdeploy { namespace vision { namespace matting { - +/*! @brief MODNet model object used when to load a MODNet model exported by MODNet. + */ class FASTDEPLOY_DECL MODNet : public FastDeployModel { public: + /** \brief Set path of model file and the configuration of runtime. + * + * \param[in] model_file Path of model file, e.g ./modnet.onnx + * \param[in] params_file Path of parameter file, e.g ppyoloe/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends" + * \param[in] model_format Model format of the loaded model, default is ONNX format + */ MODNet(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); std::string ModelName() const { return "matting/MODNet"; } - // tuple of (width, height), default (256, 256) + /// tuple of (width, height), default (256, 256) std::vector size; + /// parameters for normalization std::vector alpha; + /// parameters for normalization std::vector beta; - // whether to swap the B and R channel, such as BGR->RGB, default true. + /// whether to swap the B and R channel, such as BGR->RGB, default true. bool swap_rb; - + /** \brief Predict the matting result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output matting result will be writen to this structure + * \return true if the prediction successed, otherwise false + */ bool Predict(cv::Mat* im, MattingResult* result); private: diff --git a/fastdeploy/vision/matting/ppmatting/ppmatting.h b/fastdeploy/vision/matting/ppmatting/ppmatting.h index 8cb382595b8..864507f6b65 100644 --- a/fastdeploy/vision/matting/ppmatting/ppmatting.h +++ b/fastdeploy/vision/matting/ppmatting/ppmatting.h @@ -18,23 +18,36 @@ namespace fastdeploy { namespace vision { +/** \brief All object matting model APIs are defined inside this namespace + * + */ namespace matting { - +/*! @brief PPMatting model object used when to load a PPMatting model exported by PPMatting. + */ class FASTDEPLOY_DECL PPMatting : public FastDeployModel { public: + /** \brief Set path of model file and configuration file, and the configuration of runtime + * + * \param[in] model_file Path of model file, e.g PPMatting-512/model.pdmodel + * \param[in] params_file Path of parameter file, e.g PPMatting-512/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] config_file Path of configuration file for deployment, e.g PPMatting-512/infer_cfg.yml + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends` + * \param[in] model_format Model format of the loaded model, default is Paddle format + */ PPMatting(const std::string& model_file, const std::string& params_file, const std::string& config_file, const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE); std::string ModelName() const { return "PaddleMatting"; } - + /** \brief Predict the matting result for an input image + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format + * \param[in] result The output matting result will be writen to this structure + * \return true if the prediction successed, otherwise false + */ virtual bool Predict(cv::Mat* im, MattingResult* result); - bool with_softmax = false; - - bool is_vertical_screen = false; - private: bool Initialize(); diff --git a/fastdeploy/vision/ocr/ppocr/classifier.h b/fastdeploy/vision/ocr/ppocr/classifier.h index f810f98a376..d87fec6fa6b 100644 --- a/fastdeploy/vision/ocr/ppocr/classifier.h +++ b/fastdeploy/vision/ocr/ppocr/classifier.h @@ -43,7 +43,7 @@ class FASTDEPLOY_DECL Classifier : public FastDeployModel { std::string ModelName() const { return "ppocr/ocr_cls"; } /** \brief Predict the input image and get OCR classification model result. * - * \param[in] im The input image data, comes from cv::imread(). + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. * \param[in] result The output of OCR classification model result will be writen to this structure. * \return true if the prediction is successed, otherwise false. */ diff --git a/fastdeploy/vision/ocr/ppocr/dbdetector.h b/fastdeploy/vision/ocr/ppocr/dbdetector.h index 53bf3aceec6..e0baf319cb3 100644 --- a/fastdeploy/vision/ocr/ppocr/dbdetector.h +++ b/fastdeploy/vision/ocr/ppocr/dbdetector.h @@ -44,7 +44,7 @@ class FASTDEPLOY_DECL DBDetector : public FastDeployModel { std::string ModelName() const { return "ppocr/ocr_det"; } /** \brief Predict the input image and get OCR detection model result. * - * \param[in] im The input image data, comes from cv::imread(). + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. * \param[in] boxes_result The output of OCR detection model result will be writen to this structure. * \return true if the prediction is successed, otherwise false. */ diff --git a/fastdeploy/vision/ocr/ppocr/ppocr_v2.h b/fastdeploy/vision/ocr/ppocr/ppocr_v2.h index 88d3ee1a315..bf53000202c 100644 --- a/fastdeploy/vision/ocr/ppocr/ppocr_v2.h +++ b/fastdeploy/vision/ocr/ppocr/ppocr_v2.h @@ -54,7 +54,7 @@ class FASTDEPLOY_DECL PPOCRv2 : public FastDeployModel { /** \brief Predict the input image and get OCR result. * - * \param[in] im The input image data, comes from cv::imread(). + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. * \param[in] result The output OCR result will be writen to this structure. * \return true if the prediction successed, otherwise false. */ diff --git a/fastdeploy/vision/ocr/ppocr/recognizer.h b/fastdeploy/vision/ocr/ppocr/recognizer.h index 3ab6731ba46..d3c5fcc9d40 100644 --- a/fastdeploy/vision/ocr/ppocr/recognizer.h +++ b/fastdeploy/vision/ocr/ppocr/recognizer.h @@ -45,7 +45,7 @@ class FASTDEPLOY_DECL Recognizer : public FastDeployModel { std::string ModelName() const { return "ppocr/ocr_rec"; } /** \brief Predict the input image and get OCR recognition model result. * - * \param[in] im The input image data, comes from cv::imread(). + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. * \param[in] rec_result The output of OCR recognition model result will be writen to this structure. * \return true if the prediction is successed, otherwise false. */ diff --git a/fastdeploy/vision/segmentation/ppseg/model.h b/fastdeploy/vision/segmentation/ppseg/model.h index c81eebc41fa..1ae8b9b2449 100644 --- a/fastdeploy/vision/segmentation/ppseg/model.h +++ b/fastdeploy/vision/segmentation/ppseg/model.h @@ -46,7 +46,7 @@ class FASTDEPLOY_DECL PaddleSegModel : public FastDeployModel { /** \brief Predict the segmentation result for an input image * - * \param[in] im The input image data, comes from cv::imread() + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format * \param[in] result The output segmentation result will be writen to this structure * \return true if the segmentation prediction successed, otherwise false */