diff --git a/CMakeLists.txt b/CMakeLists.txt index 7c1bfdde744..a030bddd004 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,22 +37,24 @@ if(NOT MSVC) endif(NOT MSVC) #############################CMAKE FOR FASTDEPLOY################################ -option(ENABLE_PADDLE_FRONTEND "if to enable PaddlePaddle frontend to support load paddle model in fastdeploy." ON) -option(WITH_GPU "if WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernce-gpu" OFF) -option(ENABLE_ORT_BACKEND "if to enable onnxruntime backend." OFF) -option(ENABLE_TRT_BACKEND "if to enable tensorrt backend." OFF) -option(ENABLE_PADDLE_BACKEND "if to enable paddle backend." OFF) -option(CUDA_DIRECTORY "if build tensorrt backend, need to define path of cuda library.") -option(TRT_DIRECTORY "if build tensorrt backend, need to define path of tensorrt library.") -option(ENABLE_VISION "if to enable vision models usage." OFF) -option(ENABLE_VISION_VISUALIZE "if to enable visualize vision model result toolbox." ON) +option(ENABLE_PADDLE_FRONTEND "Whether to enable PaddlePaddle frontend to support load paddle model in fastdeploy." ON) +option(WITH_GPU "Whether WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernce-gpu" OFF) +option(ENABLE_ORT_BACKEND "Whether to enable onnxruntime backend." OFF) +option(ENABLE_TRT_BACKEND "Whether to enable tensorrt backend." OFF) +option(ENABLE_PADDLE_BACKEND "Whether to enable paddle backend." OFF) +option(CUDA_DIRECTORY "If build tensorrt backend, need to define path of cuda library.") +option(TRT_DIRECTORY "If build tensorrt backend, need to define path of tensorrt library.") +option(ENABLE_VISION "Whether to enable vision models usage." OFF) +option(ENABLE_VISION_VISUALIZE "Whether to enable visualize vision model result toolbox." ON) +option(ENABLE_TEXT "Whether to enable text models usage." OFF) # Please don't open this flag now, some bugs exists. -option(ENABLE_OPENCV_CUDA "if to enable opencv with cuda, this will allow process image with GPU." OFF) -option(ENABLE_DEBUG "if to enable print debug information, this may reduce performance." OFF) +option(ENABLE_OPENCV_CUDA "Whether to enable opencv with cuda, this will allow process image with GPU." OFF) +option(ENABLE_DEBUG "Whether to enable print debug information, this may reduce performance." OFF) # Whether to build fastdeply with vision/text/... examples, only for testings. option(WITH_VISION_EXAMPLES "Whether to build fastdeply with vision examples" OFF) +option(WITH_TEXT_EXAMPLES "Whether to build fastdeply with text examples" OFF) # Check for 32bit system if(WIN32) @@ -98,14 +100,21 @@ if (WITH_VISION_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples) set(ENABLE_VISION_VISUALIZE ON CACHE BOOL "force to enable visualize vision model result toolbox" FORCE) endif() +if (WITH_TEXT_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples) + # ENABLE_TEXT must be ON if enable text examples. + message(STATUS "Found WITH_TEXT_EXAMPLES ON, so, force ENABLE_TEXT ON") + set(ENABLE_TEXT ON CACHE BOOL "force to enable text models usage" FORCE) +endif() + add_definitions(-DFASTDEPLOY_LIB) file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*.cc) file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/ort/*.cc) file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/paddle/*.cc) file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cpp) file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc) +file(GLOB_RECURSE DEPLOY_TEXT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/text/*.cc) file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*_pybind.cc) -list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_VISION_SRCS}) +list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS}) set(DEPEND_LIBS "") @@ -113,6 +122,7 @@ file(READ "${PROJECT_SOURCE_DIR}/VERSION_NUMBER" FASTDEPLOY_VERSION) string(STRIP "${FASTDEPLOY_VERSION}" FASTDEPLOY_VERSION) set(THIRD_PARTY_PATH ${CMAKE_CURRENT_BINARY_DIR}/third_libs) +include(external/eigen.cmake) if(ENABLE_PADDLE_FRONTEND) add_definitions(-DENABLE_PADDLE_FRONTEND) include(${PROJECT_SOURCE_DIR}/external/paddle2onnx.cmake) @@ -207,6 +217,12 @@ else() endif() endif() +if(ENABLE_TEXT) + add_definitions(-DENABLE_TEXT) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_TEXT_SRCS}) + include(external/faster_tokenizer.cmake) +endif() + configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h) configure_file(${PROJECT_SOURCE_DIR}/FastDeploy.cmake.in ${PROJECT_SOURCE_DIR}/FastDeploy.cmake @ONLY) @@ -249,6 +265,15 @@ if (WITH_VISION_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples) add_subdirectory(examples) endif() +if (WITH_TEXT_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples) + add_definitions(-DWITH_TEXT_EXAMPLES) + set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/examples/bin) + # Avoid to add_subdirectory repeatedly + if (NOT WITH_VISION_EXAMPLES) + add_subdirectory(examples) + endif() +endif() + include(external/summary.cmake) fastdeploy_summary() if(WIN32) @@ -307,6 +332,12 @@ if(BUILD_FASTDEPLOY_PYTHON) file(GLOB_RECURSE VISION_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*_pybind.cc) list(REMOVE_ITEM DEPLOY_PYBIND_SRCS ${VISION_PYBIND_SRCS}) endif() + + if (NOT ENABLE_TEXT) + file(GLOB_RECURSE TEXT_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/text/*_pybind.cc) + list(REMOVE_ITEM DEPLOY_PYBIND_SRCS ${TEXT_PYBIND_SRCS}) + endif() + add_library(${PY_LIBRARY_NAME} MODULE ${DEPLOY_PYBIND_SRCS}) redefine_file_macro(${PY_LIBRARY_NAME}) set_target_properties(${PY_LIBRARY_NAME} PROPERTIES PREFIX "") diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in index 4f4643fdfba..50910f0737c 100644 --- a/FastDeploy.cmake.in +++ b/FastDeploy.cmake.in @@ -7,6 +7,7 @@ set(PADDLEINFERENCE_VERSION @PADDLEINFERENCE_VERSION@) set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND@) set(ENABLE_PADDLE_FRONTEND @ENABLE_PADDLE_FRONTEND@) set(ENABLE_VISION @ENABLE_VISION@) +set(ENABLE_TEXT @ENABLE_TEXT@) set(ENABLE_OPENCV_CUDA @ENABLE_OPENCV_CUDA@) set(LIBRARY_NAME @LIBRARY_NAME@) @@ -87,6 +88,10 @@ if(ENABLE_VISION) endif() endif() +if (ENABLE_TEXT) +# Add dependency libs later +endif() + if(ENABLE_PADDLE_FRONTEND) find_library(PADDLE2ONNX_LIB paddle2onnx ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle2onnx/lib) list(APPEND FASTDEPLOY_LIBS ${PADDLE2ONNX_LIB}) @@ -109,6 +114,7 @@ if(ENABLE_PADDLE_BACKEND) endif() message(STATUS " ENABLE_TRT_BACKEND : ${ENABLE_TRT_BACKEND}") message(STATUS " ENABLE_VISION : ${ENABLE_VISION}") +message(STATUS " ENABLE_TEXT : ${ENABLE_TEXT}") if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.4.0") diff --git a/csrcs/fastdeploy/core/fd_tensor.cc b/csrcs/fastdeploy/core/fd_tensor.cc index dbefbd9ecca..c278763cabe 100644 --- a/csrcs/fastdeploy/core/fd_tensor.cc +++ b/csrcs/fastdeploy/core/fd_tensor.cc @@ -50,6 +50,13 @@ void* FDTensor::Data() { return data.data(); } +const void* FDTensor::Data() const { + if (external_data_ptr != nullptr) { + return external_data_ptr; + } + return data.data(); +} + void FDTensor::SetExternalData(const std::vector& new_shape, const FDDataType& data_type, void* data_buffer) { dtype = data_type; diff --git a/csrcs/fastdeploy/core/fd_tensor.h b/csrcs/fastdeploy/core/fd_tensor.h index a00ff87fdfc..14c5a1142be 100644 --- a/csrcs/fastdeploy/core/fd_tensor.h +++ b/csrcs/fastdeploy/core/fd_tensor.h @@ -54,6 +54,8 @@ struct FASTDEPLOY_DECL FDTensor { // will copy to cpu store in `temporary_cpu_buffer` void* Data(); + const void* Data() const; + // Set user memory buffer for Tensor, the memory is managed by // the user it self, but the Tensor will share the memory with user // So take care with the user buffer @@ -81,4 +83,4 @@ struct FASTDEPLOY_DECL FDTensor { explicit FDTensor(const std::string& tensor_name); }; -} // namespace fastdeploy +} // namespace fastdeploy diff --git a/csrcs/fastdeploy/core/fd_type.cc b/csrcs/fastdeploy/core/fd_type.cc index 8d624cdf270..ae70fa6e536 100644 --- a/csrcs/fastdeploy/core/fd_type.cc +++ b/csrcs/fastdeploy/core/fd_type.cc @@ -93,4 +93,31 @@ std::string Str(const FDDataType& fdt) { return out; } +template +const FDDataType TypeToDataType::dtype = UNKNOWN1; + +template <> +const FDDataType TypeToDataType::dtype = BOOL; + +template <> +const FDDataType TypeToDataType::dtype = INT16; + +template <> +const FDDataType TypeToDataType::dtype = INT32; + +template <> +const FDDataType TypeToDataType::dtype = INT64; + +template <> +const FDDataType TypeToDataType::dtype = FP32; + +template <> +const FDDataType TypeToDataType::dtype = FP64; + +template <> +const FDDataType TypeToDataType::dtype = UINT8; + +template <> +const FDDataType TypeToDataType::dtype = INT8; + } // namespace fastdeploy diff --git a/csrcs/fastdeploy/core/fd_type.h b/csrcs/fastdeploy/core/fd_type.h index 325551dfb3a..50b00dca893 100644 --- a/csrcs/fastdeploy/core/fd_type.h +++ b/csrcs/fastdeploy/core/fd_type.h @@ -54,4 +54,10 @@ enum FASTDEPLOY_DECL FDDataType { FASTDEPLOY_DECL std::string Str(const FDDataType& fdt); FASTDEPLOY_DECL int32_t FDDataTypeSize(const FDDataType& data_dtype); + +template +struct FASTDEPLOY_DECL TypeToDataType { + static const FDDataType dtype; +}; + } // namespace fastdeploy diff --git a/csrcs/fastdeploy/text.h b/csrcs/fastdeploy/text.h new file mode 100644 index 00000000000..184f0f4f916 --- /dev/null +++ b/csrcs/fastdeploy/text.h @@ -0,0 +1,19 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "fastdeploy/core/config.h" +#ifdef ENABLE_TEXT +#include "fastdeploy/text/text_model.h" +#endif diff --git a/csrcs/fastdeploy/text/common/option.h b/csrcs/fastdeploy/text/common/option.h new file mode 100644 index 00000000000..a795fd06693 --- /dev/null +++ b/csrcs/fastdeploy/text/common/option.h @@ -0,0 +1,26 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "fastdeploy/utils/utils.h" + +namespace fastdeploy { +namespace text { + +struct FASTDEPLOY_DECL TextPreprocessOption {}; +struct FASTDEPLOY_DECL TextPostprocessOption {}; +struct FASTDEPLOY_DECL PredictionOption {}; + +} // namespace text +} // namespace fastdeploy diff --git a/csrcs/fastdeploy/text/common/result.cc b/csrcs/fastdeploy/text/common/result.cc new file mode 100644 index 00000000000..cb7efbb73e9 --- /dev/null +++ b/csrcs/fastdeploy/text/common/result.cc @@ -0,0 +1,18 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "fastdeploy/text/common/result.h" + +namespace fastdeploy { +namespace text {} // namespace text +} // namespace fastdeploy \ No newline at end of file diff --git a/csrcs/fastdeploy/text/common/result.h b/csrcs/fastdeploy/text/common/result.h new file mode 100644 index 00000000000..4a6f716a38a --- /dev/null +++ b/csrcs/fastdeploy/text/common/result.h @@ -0,0 +1,23 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "fastdeploy/utils/utils.h" + +namespace fastdeploy { +namespace text { + +struct FASTDEPLOY_DECL Result {}; + +} // namespace text +} // namespace fastdeploy diff --git a/csrcs/fastdeploy/text/postprocessor/postprocessor.cc b/csrcs/fastdeploy/text/postprocessor/postprocessor.cc new file mode 100644 index 00000000000..e8f71774392 --- /dev/null +++ b/csrcs/fastdeploy/text/postprocessor/postprocessor.cc @@ -0,0 +1,31 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/text/postprocessor/postprocessor.h" + +namespace fastdeploy { +namespace text { + +bool Postprocessor::Decode(const std::vector& model_result, + Result* decoded_result) const { + return true; +} + +bool Postprocessor::DecodeBatch(const std::vector& model_result, + Result* decoded_result) const { + return true; +} + +} // namespace text +} // namespace fastdeploy diff --git a/csrcs/fastdeploy/text/postprocessor/postprocessor.h b/csrcs/fastdeploy/text/postprocessor/postprocessor.h new file mode 100644 index 00000000000..76f6a709000 --- /dev/null +++ b/csrcs/fastdeploy/text/postprocessor/postprocessor.h @@ -0,0 +1,34 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "fastdeploy/core/fd_tensor.h" +#include "fastdeploy/text/common/result.h" +#include "fastdeploy/utils/utils.h" + +namespace fastdeploy { +namespace text { + +class Postprocessor { + public: + virtual bool Decode(const std::vector& model_result, + Result* decoded_result) const; + virtual bool DecodeBatch(const std::vector& model_result, + Result* decoded_result) const; +}; + +} // namespace text +} // namespace fastdeploy diff --git a/csrcs/fastdeploy/text/preprocessor/preprocessor.cc b/csrcs/fastdeploy/text/preprocessor/preprocessor.cc new file mode 100644 index 00000000000..2e2715f61c2 --- /dev/null +++ b/csrcs/fastdeploy/text/preprocessor/preprocessor.cc @@ -0,0 +1,32 @@ + +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/text/preprocessor/preprocessor.h" + +namespace fastdeploy { +namespace text { + +bool Preprocessor::Encode(const std::string& raw_text, + std::vector* encoded_tensor) const { + return true; +} + +bool Preprocessor::EncodeBatch(const std::vector& raw_texts, + std::vector* encoded_tensor) const { + return true; +} + +} // namespace text +} // namespace fastdeploy \ No newline at end of file diff --git a/csrcs/fastdeploy/text/preprocessor/preprocessor.h b/csrcs/fastdeploy/text/preprocessor/preprocessor.h new file mode 100644 index 00000000000..79996709389 --- /dev/null +++ b/csrcs/fastdeploy/text/preprocessor/preprocessor.h @@ -0,0 +1,34 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "fastdeploy/core/fd_tensor.h" +#include "fastdeploy/utils/utils.h" + +namespace fastdeploy { +namespace text { + +class Preprocessor { + public: + virtual bool Encode(const std::string& raw_text, + std::vector* encoded_tensor) const; + virtual bool EncodeBatch(const std::vector& raw_texts, + std::vector* encoded_tensor) const; +}; + +} // namespace text +} // namespace fastdeploy diff --git a/csrcs/fastdeploy/text/text_model.cc b/csrcs/fastdeploy/text/text_model.cc new file mode 100644 index 00000000000..d5a40c0e56a --- /dev/null +++ b/csrcs/fastdeploy/text/text_model.cc @@ -0,0 +1,79 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/text/text_model.h" +#include "fastdeploy/text/common/option.h" +#include "fastdeploy/text/common/result.h" +#include "fastdeploy/text/postprocessor/postprocessor.h" +#include "fastdeploy/text/preprocessor/preprocessor.h" + +namespace fastdeploy { +namespace text { + +bool TextModel::Predict(const std::string& raw_text, Result* result, + const PredictionOption& option) { + // Preprocess + std::vector input_tensor; + std::vector output_tensor; + if (!preprocessor_->Encode(raw_text, &input_tensor)) { + FDERROR << "Failed to preprocess input data while using model:" + << ModelName() << "." << std::endl; + return false; + } + + // Inference Runtime + if (!Infer(input_tensor, &output_tensor)) { + FDERROR << "Failed to inference while using model:" << ModelName() << "." + << std::endl; + return false; + } + + // Postprocess + if (postprocessor_->Decode(output_tensor, result)) { + FDERROR << "Failed to postprocess while using model:" << ModelName() << "." + << std::endl; + return false; + } + return true; +} + +bool TextModel::PredictBatch(const std::vector& raw_text_array, + Result* results, const PredictionOption& option) { + // Preprocess + std::vector input_tensor; + std::vector output_tensor; + if (!preprocessor_->EncodeBatch(raw_text_array, &input_tensor)) { + FDERROR << "Failed to preprocess input data while using model:" + << ModelName() << "." << std::endl; + return false; + } + + // Inference Runtime + if (!Infer(input_tensor, &output_tensor)) { + FDERROR << "Failed to inference while using model:" << ModelName() << "." + << std::endl; + return false; + } + + // Postprocess + if (postprocessor_->DecodeBatch(output_tensor, results)) { + FDERROR << "Failed to postprocess while using model:" << ModelName() << "." + << std::endl; + return false; + } + return true; +} + +} // namespace text +} // namespace fastdeploy \ No newline at end of file diff --git a/csrcs/fastdeploy/text/text_model.h b/csrcs/fastdeploy/text/text_model.h new file mode 100644 index 00000000000..b7fbd592972 --- /dev/null +++ b/csrcs/fastdeploy/text/text_model.h @@ -0,0 +1,51 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include + +#include "fastdeploy/fastdeploy_model.h" +#include "fastdeploy/utils/unique_ptr.h" + +namespace fastdeploy { +namespace text { + +class Preprocessor; +class Postprocessor; +class Result; +class PredictionOption; + +class FASTDEPLOY_DECL TextModel : public FastDeployModel { + public: + virtual std::string ModelName() const { return "TextModel"; } + virtual bool Predict(const std::string& raw_text, Result* result, + const PredictionOption& option); + virtual bool PredictBatch(const std::vector& raw_text_array, + Result* result, const PredictionOption& option); + template + void SetPreprocessor(Args&&... args) { + preprocessor_ = utils::make_unique(std::forward(args)...); + } + template + void SetPostprocessor(Args&&... args) { + postprocessor_ = utils::make_unique(std::forward(args)...); + } + + private: + std::unique_ptr preprocessor_; + std::unique_ptr postprocessor_; +}; + +} // namespace text +} // namespace fastdeploy diff --git a/csrcs/fastdeploy/text/text_pybind.cc b/csrcs/fastdeploy/text/text_pybind.cc new file mode 100644 index 00000000000..564892f1679 --- /dev/null +++ b/csrcs/fastdeploy/text/text_pybind.cc @@ -0,0 +1,13 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. \ No newline at end of file diff --git a/csrcs/fastdeploy/utils/unique_ptr.h b/csrcs/fastdeploy/utils/unique_ptr.h new file mode 100644 index 00000000000..2f24ef70c6b --- /dev/null +++ b/csrcs/fastdeploy/utils/unique_ptr.h @@ -0,0 +1,58 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +namespace fastdeploy { +namespace utils { +// Trait to select overloads and return types for MakeUnique. +template +struct MakeUniqueResult { + using scalar = std::unique_ptr; +}; +template +struct MakeUniqueResult { + using array = std::unique_ptr; +}; +template +struct MakeUniqueResult { + using invalid = void; +}; + +// MakeUnique(...) is an early implementation of C++14 std::make_unique. +// It is designed to be 100% compatible with std::make_unique so that the +// eventual switchover will be a simple renaming operation. +template +typename MakeUniqueResult::scalar make_unique(Args &&... args) { // NOLINT + return std::unique_ptr( + new T(std::forward(args)...)); // NOLINT(build/c++11) +} + +// Overload for array of unknown bound. +// The allocation of arrays needs to use the array form of new, +// and cannot take element constructor arguments. +template +typename MakeUniqueResult::array make_unique(size_t n) { + return std::unique_ptr(new typename std::remove_extent::type[n]()); +} + +// Reject arrays of known bound. +template +typename MakeUniqueResult::invalid make_unique(Args &&... /* args */) = + delete; // NOLINT + +} // namespace utils +} // namespace fastdeploy diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 31ca40af3c1..770bf44da2f 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -37,4 +37,14 @@ if(WITH_VISION_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples/vision) endforeach() endif() +# text examples +if(WITH_TEXT_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples/text) + message(STATUS "") + message(STATUS "*************FastDeploy Examples Summary**********") + file(GLOB ALL_VISION_EXAMPLE_SRCS ${PROJECT_SOURCE_DIR}/examples/text/*.cc) + foreach(_CC_FILE ${ALL_VISION_EXAMPLE_SRCS}) + add_fastdeploy_executable(text ${_CC_FILE}) + endforeach() +endif() + # other examples ... diff --git a/examples/text/compute.h b/examples/text/compute.h new file mode 100644 index 00000000000..b279473b75c --- /dev/null +++ b/examples/text/compute.h @@ -0,0 +1,270 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "fastdeploy/core/fd_tensor.h" +#include "unsupported/Eigen/CXX11/Tensor" + +namespace fastdeploy { +// EigenDim converts shape into Eigen::DSizes. +template +struct EigenDim { + using Type = Eigen::DSizes; + + static Type From(const std::vector& dims) { + Type ret; + for (int64_t d = 0; d < dims.size(); d++) { + ret[d] = dims[d]; + } + return ret; + } +}; + +// Interpret FDTensor as EigenTensor and EigenConstTensor. +template +struct EigenTensor { + using Type = Eigen::TensorMap>; + + using ConstType = + Eigen::TensorMap>; + + static Type From(FDTensor& tensor, + const std::vector& dims) { // NOLINT + return Type(reinterpret_cast(tensor.data.data()), + EigenDim::From(dims)); + } + + static Type From(FDTensor& tensor) { // NOLINT + return From(tensor, tensor.shape); + } // NOLINT + + static ConstType From(const FDTensor& tensor, + const std::vector& dims) { + return ConstType(reinterpret_cast(tensor.data.data()), + EigenDim::From(dims)); + } + + static ConstType From(const FDTensor& tensor) { + return From(tensor, tensor.shape); + } +}; + +template +struct EigenScalar { + // Scalar tensor (implemented as a rank-0 tensor) of scalar type T. + using Type = Eigen::TensorMap< + Eigen::TensorFixedSize, MajorType, IndexType>>; + using ConstType = Eigen::TensorMap< + Eigen::TensorFixedSize, MajorType, IndexType>>; + + static Type From(FDTensor& tensor) { + return Type(reinterpret_cast(tensor.data.data())); + } // NOLINT + + static ConstType From(const FDTensor& tensor) { + return ConstType(reinterpret_cast(tensor.data.data())); + } +}; + +template +struct EigenVector : public EigenTensor { + // Flatten reshapes a Tensor into an EigenVector. + static typename EigenVector::Type Flatten(FDTensor& tensor) { // NOLINT + return EigenVector::From(tensor, {tensor.Numel()}); + } + + static typename EigenVector::ConstType Flatten( + const FDTensor& tensor) { // NOLINT + return EigenVector::From(tensor, {tensor.Numel()}); + } +}; + +template +void ReduceFunctor(const Eigen::DefaultDevice& dev, const FDTensor& input, + FDTensor* output, const std::vector& dims, + bool keep_dim = true) { + auto x = EigenTensor::From(input); + auto x_rank = static_cast(x.dimensions().size()); + auto reduce_dim = Eigen::array(); + std::vector dims_ref = dims; + std::vector out_dims(input.shape.size()); + std::copy(input.shape.begin(), input.shape.end(), out_dims.begin()); + for (size_t i = 0; i < dims_ref.size(); ++i) { + if (dims_ref[i] < 0) dims_ref[i] = x_rank + dims_ref[i]; + out_dims[dims_ref[i]] = 1; + reduce_dim[i] = dims_ref[i]; + } + output->Allocate(out_dims, TypeToDataType::dtype); + if (keep_dim && x_rank > 1) { + const int kDelFlag = -2; + auto dims_vector = out_dims; + for (size_t i = 0; i < dims_ref.size(); ++i) { + dims_vector[dims_ref[i]] = kDelFlag; + } + dims_vector.erase(remove(dims_vector.begin(), dims_vector.end(), kDelFlag), + dims_vector.end()); + out_dims = dims_vector; + } + Functor functor; + + if (D == 1) { + auto out = EigenScalar::From(*output); + functor(dev, &x, &out, reduce_dim); + } else { + dims_ref.resize(out_dims.size()); + std::copy(out_dims.begin(), out_dims.end(), dims_ref.begin()); + for (int i = 0; i < dims_ref.size(); ++i) { + std::cerr << dims_ref[i] << ", "; + } + std::cerr << std::endl; + auto out = EigenTensor::From(*output, dims_ref); + functor(dev, &x, &out, reduce_dim); + } +} + +struct MaxFunctor { + template + void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) { + y->device(dev) = x->maximum(dim); + } +}; + +struct SumFunctor { + template + void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) { + y->device(dev) = x->sum(dim); + } +}; + +inline void GetBroadcastDimsArrays(const std::vector& x_dims, + const std::vector& y_dims, + int* x_dims_array, int* y_dims_array, + int* out_dims_array, const int max_dim, + const int axis) { + if (x_dims.size() > y_dims.size()) { + std::fill(y_dims_array, y_dims_array + axis, 1); + if (axis + y_dims.size() < max_dim) { + std::fill(y_dims_array + axis + y_dims.size(), y_dims_array + max_dim, 1); + } + std::copy(x_dims.data(), x_dims.data() + x_dims.size(), x_dims_array); + std::copy(y_dims.data(), y_dims.data() + y_dims.size(), + y_dims_array + axis); + } else { + std::fill(x_dims_array, x_dims_array + axis, 1); + if (axis + x_dims.size() < max_dim) { + std::fill(x_dims_array + axis + x_dims.size(), x_dims_array + max_dim, 1); + } + std::copy(x_dims.data(), x_dims.data() + x_dims.size(), + x_dims_array + axis); + std::copy(y_dims.data(), y_dims.data() + y_dims.size(), y_dims_array); + } + + for (int i = 0; i < max_dim; i++) { + if ((x_dims_array[i] > 1 || y_dims_array[i] > 1) || + (x_dims_array[i] == 1 && y_dims_array[i] == 1)) { + out_dims_array[i] = (std::max)(x_dims_array[i], y_dims_array[i]); + } else { + out_dims_array[i] = -1; + } + } +} + +inline int GetElementwiseIndex(const int* x_dims_array, const int max_dim, + const int* index_array) { + int index_ = 0; + for (int i = 0; i < max_dim; i++) { + if (x_dims_array[i] > 1) { + index_ = index_ * x_dims_array[i] + index_array[i]; + } + } + return index_; +} + +inline void UpdateElementwiseIndexArray(const int* out_dims_array, + const int max_dim, int* index_array) { + for (int i = max_dim - 1; i >= 0; --i) { + ++index_array[i]; + if (index_array[i] >= out_dims_array[i]) { + index_array[i] -= out_dims_array[i]; + } else { + break; + } + } +} + +template +void CommonElementwiseBroadcastForward(const FDTensor& x, const FDTensor& y, + FDTensor* z, Functor func, int axis, + const bool is_xsize_larger = true) { + std::vector x_dims = x.shape; + std::vector y_dims = y.shape; + int max_dim = (std::max)(x_dims.size(), y_dims.size()); + int diff = x_dims.size() - y_dims.size(); + axis = (axis == -1 ? std::abs(diff) : axis); + std::vector x_dims_array(max_dim); + std::vector y_dims_array(max_dim); + std::vector out_dims_array(max_dim); + GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array.data(), + y_dims_array.data(), out_dims_array.data(), max_dim, + axis); + + const T* x_data = reinterpret_cast(x.Data()); + const T* y_data = reinterpret_cast(y.Data()); + + z->Allocate(out_dims_array, TypeToDataType::dtype); + OutType* out_data = reinterpret_cast(z->MutableData()); + + const int out_size = + std::accumulate(out_dims_array.data(), out_dims_array.data() + max_dim, 1, + std::multiplies()); + int x_index, y_index; + std::vector index_array(max_dim, 0); + for (int out_index = 0; out_index < out_size; ++out_index) { + x_index = + GetElementwiseIndex(x_dims_array.data(), max_dim, index_array.data()); + y_index = + GetElementwiseIndex(y_dims_array.data(), max_dim, index_array.data()); + if (is_xsize_larger) { + out_data[out_index] = func(x_data[x_index], y_data[y_index]); + } else { + out_data[out_index] = func(y_data[y_index], x_data[x_index]); + } + + UpdateElementwiseIndexArray(out_dims_array.data(), max_dim, + index_array.data()); + } +} + +template +struct AddFunctor { + T operator()(const T& lhs, const T& rhs) { return lhs + rhs; } +}; + +template +struct SubFunctor { + T operator()(const T& lhs, const T& rhs) { return lhs - rhs; } +}; + +template +struct DivFunctor { + T operator()(const T& lhs, const T& rhs) { return lhs / rhs; } +}; + +} // namespace fastdeploy diff --git a/examples/text/ernie_tokencls.cc b/examples/text/ernie_tokencls.cc new file mode 100644 index 00000000000..4df1f570556 --- /dev/null +++ b/examples/text/ernie_tokencls.cc @@ -0,0 +1,232 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include + +#include "fastdeploy/text.h" +#include "tokenizers/ernie_faster_tokenizer.h" + +using namespace paddlenlp; + +void LoadTransitionFromFile(const std::string& file, + std::vector* transitions, int* num_tags) { + std::ifstream fin(file); + std::string curr_transition; + float transition; + int i = 0; + while (fin) { + std::getline(fin, curr_transition); + std::istringstream iss(curr_transition); + while (iss) { + iss >> transition; + transitions->push_back(transition); + } + if (curr_transition != "") { + ++i; + } + } + *num_tags = i; +} + +// Only useful for axis = -1 +template +void Softmax(const fastdeploy::FDTensor& input, fastdeploy::FDTensor* output) { + auto softmax_func = [](const T* score_vec, T* softmax_vec, int label_num) { + double score_max = *(std::max_element(score_vec, score_vec + label_num)); + double e_sum = 0; + for (int j = 0; j < label_num; j++) { + softmax_vec[j] = std::exp(score_vec[j] - score_max); + e_sum += softmax_vec[j]; + } + for (int k = 0; k < label_num; k++) { + softmax_vec[k] /= e_sum; + } + }; + + std::vector output_shape; + for (int i = 0; i < input.shape.size(); ++i) { + output_shape.push_back(input.shape[i]); + } + output->Allocate(output_shape, input.dtype); + int label_num = output_shape.back(); + int batch_size = input.Numel() / label_num; + int offset = 0; + const T* input_ptr = reinterpret_cast(input.Data()); + T* output_ptr = reinterpret_cast(output->Data()); + for (int i = 0; i < batch_size; ++i) { + softmax_func(input_ptr + offset, output_ptr + offset, label_num); + offset += label_num; + } +} + +// Only useful for axis = -1 +template +void Max(const fastdeploy::FDTensor& input, fastdeploy::FDTensor* output) { + std::vector output_shape; + for (int i = 0; i < input.shape.size() - 1; ++i) { + output_shape.push_back(input.shape[i]); + } + output_shape.push_back(1); + output->Allocate(output_shape, input.dtype); + int batch_size = output->Numel(); + int label_num = input.shape.back(); + int offset = 0; + const T* input_ptr = reinterpret_cast(input.Data()); + T* output_ptr = reinterpret_cast(output->Data()); + for (int i = 0; i < batch_size; ++i) { + output_ptr[i] = + *(std::max_element(input_ptr + offset, input_ptr + offset + label_num)); + offset += label_num; + } +} + +template +void ViterbiDecode(const fastdeploy::FDTensor& slot_logits, + const fastdeploy::FDTensor& trans, + fastdeploy::FDTensor* best_path) { + int batch_size = slot_logits.shape[0]; + int seq_len = slot_logits.shape[1]; + int num_tags = slot_logits.shape[2]; + best_path->Allocate({batch_size, seq_len}, fastdeploy::FDDataType::INT64); + + const T* slot_logits_ptr = reinterpret_cast(slot_logits.Data()); + const T* trans_ptr = reinterpret_cast(trans.Data()); + int64_t* best_path_ptr = reinterpret_cast(best_path->Data()); + std::vector scores(num_tags); + std::copy(slot_logits_ptr, slot_logits_ptr + num_tags, scores.begin()); + std::vector> M(num_tags, std::vector(num_tags)); + for (int b = 0; b < batch_size; ++b) { + std::vector> paths; + const T* curr_slot_logits_ptr = slot_logits_ptr + b * seq_len * num_tags; + int64_t* curr_best_path_ptr = best_path_ptr + b * seq_len; + for (int t = 1; t < seq_len; t++) { + for (size_t i = 0; i < num_tags; i++) { + for (size_t j = 0; j < num_tags; j++) { + auto trans_idx = i * num_tags * num_tags + j * num_tags; + auto slot_logit_idx = t * num_tags + j; + M[i][j] = scores[i] + trans_ptr[trans_idx] + + curr_slot_logits_ptr[slot_logit_idx]; + } + } + std::vector idxs; + for (size_t i = 0; i < num_tags; i++) { + T max = 0.0f; + int idx = 0; + for (size_t j = 0; j < num_tags; j++) { + if (M[j][i] > max) { + max = M[j][i]; + idx = j; + } + } + scores[i] = max; + idxs.push_back(idx); + } + paths.push_back(idxs); + } + int scores_max_index = 0; + float scores_max = 0.0f; + for (size_t i = 0; i < scores.size(); i++) { + if (scores[i] > scores_max) { + scores_max = scores[i]; + scores_max_index = i; + } + } + curr_best_path_ptr[seq_len - 1] = scores_max_index; + for (int i = seq_len - 2; i >= 0; i--) { + int index = curr_best_path_ptr[i + 1]; + curr_best_path_ptr[i] = paths[i][index]; + } + } +} + +int main() { + // 1. Define a ernie faster tokenizer + faster_tokenizer::tokenizers_impl::ErnieFasterTokenizer tokenizer( + "ernie_vocab.txt"); + std::vector strings_list = { + "导航去科技园二号楼", "屏幕亮度为我减小一点吧"}; + std::vector encodings; + tokenizer.EncodeBatchStrings(strings_list, &encodings); + size_t batch_size = strings_list.size(); + size_t seq_len = encodings[0].GetLen(); + for (auto&& encoding : encodings) { + std::cout << encoding.DebugString() << std::endl; + } + // 2. Initialize runtime + fastdeploy::RuntimeOption runtime_option; + runtime_option.SetModelPath("nano_static/model.pdmodel", + "nano_static/model.pdiparams"); + fastdeploy::Runtime runtime; + runtime.Init(runtime_option); + + // 3. Construct input vector + // 3.1 Convert encodings to input_ids, token_type_ids + std::vector input_ids, token_type_ids; + for (int i = 0; i < encodings.size(); ++i) { + auto&& curr_input_ids = encodings[i].GetIds(); + auto&& curr_type_ids = encodings[i].GetTypeIds(); + input_ids.insert(input_ids.end(), curr_input_ids.begin(), + curr_input_ids.end()); + token_type_ids.insert(token_type_ids.end(), curr_type_ids.begin(), + curr_type_ids.end()); + } + // 3.2 Set data to input vector + std::vector inputs(runtime.NumInputs()); + void* inputs_ptrs[] = {input_ids.data(), token_type_ids.data()}; + for (int i = 0; i < runtime.NumInputs(); ++i) { + inputs[i].SetExternalData({batch_size, seq_len}, + fastdeploy::FDDataType::INT64, inputs_ptrs[i]); + inputs[i].name = runtime.GetInputInfo(i).name; + } + + // 4. Infer + std::vector outputs(runtime.NumOutputs()); + runtime.Infer(inputs, &outputs); + + // 5. Postprocess + fastdeploy::FDTensor domain_probs, intent_probs; + Softmax(outputs[0], &domain_probs); + Softmax(outputs[1], &intent_probs); + + fastdeploy::FDTensor domain_max_probs, intent_max_probs; + Max(domain_probs, &domain_max_probs); + Max(intent_probs, &intent_max_probs); + + std::vector transition; + int num_tags; + LoadTransitionFromFile("joint_transition.txt", &transition, &num_tags); + fastdeploy::FDTensor trans; + trans.SetExternalData({num_tags, num_tags}, fastdeploy::FDDataType::FP32, + transition.data()); + + fastdeploy::FDTensor best_path; + ViterbiDecode(outputs[2], trans, &best_path); + // 6. Print result + domain_max_probs.PrintInfo(); + intent_max_probs.PrintInfo(); + + batch_size = best_path.shape[0]; + seq_len = best_path.shape[1]; + const int64_t* best_path_ptr = + reinterpret_cast(best_path.Data()); + for (int i = 0; i < batch_size; ++i) { + std::cout << "best_path[" << i << "] = "; + for (int j = 0; j < seq_len; ++j) { + std::cout << best_path_ptr[i * seq_len + j] << ", "; + } + std::cout << std::endl; + } + best_path.PrintInfo(); + return 0; +} \ No newline at end of file diff --git a/external/eigen.cmake b/external/eigen.cmake new file mode 100644 index 00000000000..2248ee0fdbf --- /dev/null +++ b/external/eigen.cmake @@ -0,0 +1,66 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include(ExternalProject) + +# update eigen to the commit id f612df27 on 03/16/2021 +set(EIGEN_PREFIX_DIR ${THIRD_PARTY_PATH}/eigen3) +set(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3/src/extern_eigen3) +set(EIGEN_REPOSITORY https://gitlab.com/libeigen/eigen.git) +set(EIGEN_TAG f612df273689a19d25b45ca4f8269463207c4fee) + +if(WIN32) + add_definitions(-DEIGEN_STRONG_INLINE=inline) +elseif(LINUX) + if(WITH_ROCM) + # For HIPCC Eigen::internal::device::numeric_limits is not EIGEN_DEVICE_FUNC + # which will cause compiler error of using __host__ funciont + # in __host__ __device__ + file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/eigen/Meta.h native_src) + file(TO_NATIVE_PATH ${EIGEN_SOURCE_DIR}/Eigen/src/Core/util/Meta.h + native_dst) + file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/eigen/TensorReductionGpu.h + native_src1) + file( + TO_NATIVE_PATH + ${EIGEN_SOURCE_DIR}/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h + native_dst1) + set(EIGEN_PATCH_COMMAND cp ${native_src} ${native_dst} && cp ${native_src1} + ${native_dst1}) + endif() +endif() + +set(EIGEN_INCLUDE_DIR ${EIGEN_SOURCE_DIR}) +include_directories(${EIGEN_INCLUDE_DIR}) + +ExternalProject_Add( + extern_eigen3 + GIT_REPOSITORY ${EIGEN_REPOSITORY} + GIT_TAG ${EIGEN_TAG} + PREFIX ${EIGEN_PREFIX_DIR} + UPDATE_COMMAND "" + PATCH_COMMAND ${EIGEN_PATCH_COMMAND} + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "") + +add_library(eigen3 INTERFACE) + +add_dependencies(eigen3 extern_eigen3) + +# sw not support thread_local semantic +if(WITH_SW) + add_definitions(-DEIGEN_AVOID_THREAD_LOCAL) +endif() diff --git a/external/faster_tokenizer.cmake b/external/faster_tokenizer.cmake new file mode 100644 index 00000000000..fabc33db581 --- /dev/null +++ b/external/faster_tokenizer.cmake @@ -0,0 +1,79 @@ + + +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +include(ExternalProject) + +set(FASTERTOKENIZER_PROJECT "extern_faster_tokenizer") +set(FASTERTOKENIZER_PREFIX_DIR ${THIRD_PARTY_PATH}/faster_tokenizer) +set(FASTERTOKENIZER_SOURCE_DIR + ${THIRD_PARTY_PATH}/faster_tokenizer/src/${FASTERTOKENIZER_PROJECT}) +set(FASTERTOKENIZER_INSTALL_DIR ${THIRD_PARTY_PATH}/install/faster_tokenizer) +set(FASTERTOKENIZER_INC_DIR + "${FASTERTOKENIZER_INSTALL_DIR}/include" + "${FASTERTOKENIZER_INSTALL_DIR}/third_party/include" + CACHE PATH "faster_tokenizer include directory." FORCE) +set(FASTERTOKENIZER_LIB_DIR + "${FASTERTOKENIZER_INSTALL_DIR}/lib/" + CACHE PATH "faster_tokenizer lib directory." FORCE) +set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" + "${FASTERTOKENIZER_LIB_DIR}") + +include_directories(${FASTERTOKENIZER_INC_DIR}) + +# Set lib path +if(WIN32) +elseif(APPLE) +# Not support apple so far. +else() + +set(FASTERTOKENIZER_COMPILE_LIB "${FASTERTOKENIZER_LIB_DIR}/libcore_tokenizers.so" + CACHE FILEPATH "faster_tokenizer compile library." FORCE) +message("FASTERTOKENIZER_COMPILE_LIB = ${FASTERTOKENIZER_COMPILE_LIB}") +set(ICUDT_LIB "") +set(ICUUC_LIB "") +endif(WIN32) + +set(FASTERTOKENIZER_URL_BASE "https://bj.bcebos.com/paddlenlp/faster_tokenizer/") +set(FASTERTOKENIZER_VERSION "dev") + +# Set download url +if(WIN32) +elseif(APPLE) +else() + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + set(FASTERTOKENIZER_FILE "faster_tokenizer-linux-aarch64-${FASTERTOKENIZER_VERSION}.tgz") + else() + set(FASTERTOKENIZER_FILE "faster_tokenizer-linux-x64-${FASTERTOKENIZER_VERSION}.tgz") + endif() +endif() +set(FASTERTOKENIZER_URL "${FASTERTOKENIZER_URL_BASE}${FASTERTOKENIZER_FILE}") + +ExternalProject_Add( + ${FASTERTOKENIZER_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + URL ${FASTERTOKENIZER_URL} + PREFIX ${FASTERTOKENIZER_PREFIX_DIR} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND + ${CMAKE_COMMAND} -E copy_directory ${FASTERTOKENIZER_SOURCE_DIR} ${FASTERTOKENIZER_INSTALL_DIR} + BUILD_BYPRODUCTS ${FASTERTOKENIZER_COMPILE_LIB}) + +add_library(faster_tokenizer STATIC IMPORTED GLOBAL) +set_property(TARGET faster_tokenizer PROPERTY IMPORTED_LOCATION ${FASTERTOKENIZER_COMPILE_LIB}) +add_dependencies(faster_tokenizer ${FASTERTOKENIZER_PROJECT}) +list(APPEND DEPEND_LIBS faster_tokenizer) \ No newline at end of file diff --git a/external/summary.cmake b/external/summary.cmake index bd5e7939028..754af9c3ecb 100644 --- a/external/summary.cmake +++ b/external/summary.cmake @@ -45,6 +45,7 @@ function(fastdeploy_summary) message(STATUS " TRT_DRECTORY : ${TRT_DIRECTORY}") endif() message(STATUS " ENABLE_VISION : ${ENABLE_VISION}") + message(STATUS " ENABLE_TEXT : ${ENABLE_TEXT}") message(STATUS " ENABLE_DEBUG : ${ENABLE_DEBUG}") message(STATUS " ENABLE_VISION_VISUALIZE : ${ENABLE_VISION_VISUALIZE}") endfunction() diff --git a/fastdeploy/text/__init__.py b/fastdeploy/text/__init__.py new file mode 100644 index 00000000000..7d175762cf9 --- /dev/null +++ b/fastdeploy/text/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import diff --git a/model_zoo/text/ernie-3.0/README.md b/model_zoo/text/ernie-3.0/README.md new file mode 100755 index 00000000000..c601485795e --- /dev/null +++ b/model_zoo/text/ernie-3.0/README.md @@ -0,0 +1,238 @@ +# ERNIE 3.0 Python部署指南 +本文介绍 ERNIE 3.0 Python 端的部署,包括部署环境的准备,序列标注和分类两大场景下的使用示例。 +- [ERNIE 3.0 Python 部署指南](#ERNIE3.0Python部署指南) + - [1. 环境准备](#1-环境准备) + - [1.1 CPU 端](#11-CPU端) + - [1.2 GPU 端](#12-GPU端) + - [2. 序列标注模型推理](#2-序列标注模型推理) + - [2.1 模型获取](#21-模型获取) + - [2.2 CPU 端推理样例](#22-CPU端推理样例) + - [2.3 GPU 端推理样例](#23-GPU端推理样例) + - [3. 分类模型推理](#3-分类模型推理) + - [3.1 模型获取](#31-模型获取) + - [3.2 CPU 端推理样例](#32-CPU端推理样例) + - [3.3 GPU 端推理样例](#33-GPU端推理样例) +## 1. 环境准备 +ERNIE 3.0 的部署分为 CPU 和 GPU 两种情况,请根据你的部署环境安装对应的依赖。 +### 1.1 CPU端 +CPU 端的部署请使用如下命令安装所需依赖 +``` +pip install -r requirements_cpu.txt +``` +### 1.2 GPU端 +为了在 GPU 上获得最佳的推理性能和稳定性,请先确保机器已正确安装 NVIDIA 相关驱动和基础软件,确保 CUDA >= 11.2,CuDNN >= 8.2,并使用以下命令安装所需依赖 +``` +pip install -r requirements_gpu.txt +``` +如需使用半精度(FP16)或量化(INT8)部署,请确保GPU设备的 CUDA 计算能力 (CUDA Compute Capability) 大于 7.0,典型的设备包括 V100、T4、A10、A100、GTX 20 系列和 30 系列显卡等。同时 INT8 推理需要安装 TensorRT 以及包含 TensorRT 预测库的 PaddlePaddle。 +更多关于 CUDA Compute Capability 和精度支持情况请参考 NVIDIA 文档:[GPU硬件与支持精度对照表](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-840-ea/support-matrix/index.html#hardware-precision-matrix) + +1. TensorRT 安装请参考:[TensorRT安装说明](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-840-ea/install-guide/index.html#overview),Linux 端简要步骤如下: + + (1)下载 TensorRT8.2 版本,文件名 TensorRT-XXX.tar.gz,[下载链接](https://developer.nvidia.com/tensorrt) + + (2)解压得到 TensorRT-XXX 文件夹 + + (3)通过 export LD_LIBRARY_PATH=TensorRT-XXX/lib:$LD_LIBRARY_PATH 将 lib 路径加入到 LD_LIBRARY_PATH 中 + + (4)使用 pip install 安装 TensorRT-XXX/python 中对应的 TensorRT 安装包 + +2. PaddlePaddle 预测库的安装请参考 [PaddlePaddle 预测库安装文档](https://www.paddlepaddle.org.cn/inference/v2.3/user_guides/source_compile.html),Linux 端简要步骤如下: + + (1)根据 CUDA 环境和 Python 版本下载对应的 PaddlePaddle 预测库,注意须下载支持 TensorRT 的预测包,如 linux-cuda11.2-cudnn8.2-trt8-gcc8.2。[PaddlePaddle 预测库下载路径](https://www.paddlepaddle.org.cn/inference/v2.3/user_guides/download_lib.html#python) + + (2)使用 pip install 安装下载好的 PaddlePaddle 预测库 + + +## 2. 序列标注模型推理 +### 2.1 模型获取 +用户可使用自己训练的模型进行推理,具体训练调优方法可参考[模型训练调优](./../../README.md#微调),也可以使用我们提供的 msra_ner 数据集训练的 ERNIE 3.0 模型,请执行如下命令获取模型: +``` +# 获取序列标注FP32模型 +wget https://paddlenlp.bj.bcebos.com/models/transformers/ernie_3.0/msra_ner_pruned_infer_model.zip +unzip msra_ner_pruned_infer_model.zip +``` +### 2.2 CPU端推理样例 +在 CPU 端,请使用如下命令进行部署 +``` +python infer_cpu.py --task_name token_cls --model_path ./msra_ner_pruned_infer_model/float32 +``` +输出打印如下: +``` +input data: 北京的涮肉,重庆的火锅,成都的小吃都是极具特色的美食。 +The model detects all entities: +entity: 北京 label: LOC pos: [0, 1] +entity: 重庆 label: LOC pos: [6, 7] +entity: 成都 label: LOC pos: [12, 13] +----------------------------- +input data: 乔丹、科比、詹姆斯和姚明都是篮球界的标志性人物。 +The model detects all entities: +entity: 乔丹 label: PER pos: [0, 1] +entity: 科比 label: PER pos: [3, 4] +entity: 詹姆斯 label: PER pos: [6, 8] +entity: 姚明 label: PER pos: [10, 11] +----------------------------- +``` +infer_cpu.py 脚本中的参数说明: +| 参数 |参数说明 | +|----------|--------------| +|--task_name | 配置任务名称,可选 seq_cls 或 token_cls,默认为 seq_cls| +|--model_name_or_path | 模型的路径或者名字,默认为 ernie-3.0-medium-zh| +|--model_path | 用于推理的 Paddle 模型的路径| +|--max_seq_length |最大序列长度,默认为 128| +|--precision_mode | 推理精度,可选 fp32,fp16 或者 int8,当输入非量化模型并设置 int8 时使用动态量化进行加速,默认 fp32 | +|--num_threads | 配置 cpu 的线程数,默认为 cpu 的最大线程数 | + +**Note**:在支持 avx512_vnni 指令集或 Intel® DL Boost 的 CPU 设备上,可设置 precision_mode 为 int8 对 FP32 模型进行动态量化以获得更高的推理性能,具体性能提升情况请查阅[量化性能提升情况](../../README.md#压缩效果)。 +CPU 端,开启动态量化的命令如下: +``` +python infer_cpu.py --task_name token_cls --model_path ./msra_ner_pruned_infer_model/float32 --precision_mode int8 +``` +INT8 的输出打印和 FP32 的输出打印一致。 + +### 2.3 GPU端推理样例 +在 GPU 端,请使用如下命令进行部署 +``` +python infer_gpu.py --task_name token_cls --model_path ./msra_ner_pruned_infer_model/float32 +``` +输出打印如下: +``` +input data: 北京的涮肉,重庆的火锅,成都的小吃都是极具特色的美食。 +The model detects all entities: +entity: 北京 label: LOC pos: [0, 1] +entity: 重庆 label: LOC pos: [6, 7] +entity: 成都 label: LOC pos: [12, 13] +----------------------------- +input data: 乔丹、科比、詹姆斯和姚明都是篮球界的标志性人物。 +The model detects all entities: +entity: 乔丹 label: PER pos: [0, 1] +entity: 科比 label: PER pos: [3, 4] +entity: 詹姆斯 label: PER pos: [6, 8] +entity: 姚明 label: PER pos: [10, 11] +----------------------------- +``` +如果需要 FP16 进行加速,可以设置 precision_mode 为 fp16,具体命令为 +``` +python infer_gpu.py --task_name token_cls --model_path ./msra_ner_pruned_infer_model/float32 --precision_mode fp16 +``` +如果需要进行 INT8 量化加速,还需要使用量化脚本对训练好的 FP32 模型进行量化,然后使用量化后的模型进行部署,模型的量化请参考:[模型量化脚本使用说明](./../../README.md#模型压缩),也可下载我们量化后的 INT8 模型进行部署,请执行如下命令获取模型: +``` +# 获取序列标注 INT8 量化模型 +wget https://paddlenlp.bj.bcebos.com/models/transformers/ernie_3.0/msra_ner_quant_infer_model.zip +unzip msra_ner_quant_infer_model.zip +``` +量化模型的部署命令为: +``` +# 第一步,打开 set_dynamic_shape 开关,自动配置动态shape,在当前目录下生成 dynamic_shape_info.txt 文件 +python infer_gpu.py --task_name token_cls --model_path ./msra_ner_quant_infer_model/int8 --shape_info_file dynamic_shape_info.txt --set_dynamic_shape +# 第二步,读取上一步中生成的 dynamic_shape_info.txt 文件,开启预测 +python infer_gpu.py --task_name token_cls --model_path ./msra_ner_quant_infer_model/int8 --shape_info_file dynamic_shape_info.txt +``` +FP16 和 INT8 推理的运行结果和FP32的运行结果一致。 + +infer_gpu.py 脚本中的参数说明: +| 参数 |参数说明 | +|----------|--------------| +|--task_name | 配置任务名称,可选 seq_cls 或 token_cls,默认为 seq_cls| +|--model_name_or_path | 模型的路径或者名字,默认为ernie-3.0-medium-zh| +|--model_path | 用于推理的 Paddle 模型的路径| +|--batch_size |最大可测的 batch size,默认为 32| +|--max_seq_length |最大序列长度,默认为 128| +|--shape_info_file | 指定 dynamic shape info 的存储文件名,默认为 shape_info.txt | +|--set_dynamic_shape | 配置是否自动配置 TensorRT 的 dynamic shape,在GPU上INT8量化推理时需要先开启此选项进行 dynamic shape 配置,生成 shape_info.txt 后再关闭,默认关闭 | +|--precision_mode | 推理精度,可选 fp32,fp16 或者 int8,默认 fp32 | + +## 3. 分类模型推理 +### 3.1 模型获取 +用户可使用自己训练的模型进行推理,具体训练调优方法可参考[模型训练调优](./../../README.md#微调),也可以使用我们提供的 tnews 数据集训练的 ERNIE 3.0 模型,请执行如下命令获取模型: +``` +# 分类模型模型: +wget https://paddlenlp.bj.bcebos.com/models/transformers/ernie_3.0/tnews_pruned_infer_model.zip +unzip tnews_pruned_infer_model.zip +``` +### 3.2 CPU端推理样例 +在 CPU 端,请使用如下命令进行部署 +``` +python infer_cpu.py --task_name seq_cls --model_path ./tnews_pruned_infer_model/float32 +``` +输出打印如下: +``` +input data: 未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗? +seq cls result: +label: news_car confidence: 0.5543532371520996 +----------------------------- +input data: 黄磊接受华少快问快答,不光智商逆天,情商也不逊黄渤 +seq cls result: +label: news_entertainment confidence: 0.9495906829833984 +----------------------------- +``` +和序列标注模型推理类似,使用动态量化进行加速的命令如下: +``` +python infer_cpu.py --task_name seq_cls --model_path ./tnews_pruned_infer_model/float32 --precision_mode int8 +``` +输出打印如下: +``` +input data: 未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗? +seq cls result: +label: news_car confidence: 0.5778735876083374 +----------------------------- +input data: 黄磊接受华少快问快答,不光智商逆天,情商也不逊黄渤 +seq cls result: +label: news_entertainment confidence: 0.9206441044807434 +----------------------------- +``` +### 3.3 GPU端推理样例 +在 GPU 端,请使用如下命令进行部署 +``` +python infer_gpu.py --task_name seq_cls --model_path ./tnews_pruned_infer_model/float32 +``` +输出打印如下: +``` +input data: 未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗? +seq cls result: +label: news_car confidence: 0.5543532371520996 +----------------------------- +input data: 黄磊接受华少快问快答,不光智商逆天,情商也不逊黄渤 +seq cls result: +label: news_entertainment confidence: 0.9495906829833984 +----------------------------- +``` +如果需要 FP16 进行加速,可以设置 precision_mode 为 fp16,具体命令为 +``` +python infer_gpu.py --task_name seq_cls --model_path ./tnews_pruned_infer_model/float32 --precision_mode fp16 +``` +输出打印如下: +``` +input data: 未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗? +seq cls result: +label: news_car confidence: 0.5536671876907349 +----------------------------- +input data: 黄磊接受华少快问快答,不光智商逆天,情商也不逊黄渤 +seq cls result: +label: news_entertainment confidence: 0.9494127035140991 +----------------------------- +``` +如果需要进行 INT8 量化加速,还需要使用量化脚本对训练好的 FP32 模型进行量化,然后使用量化后的模型进行部署,模型的量化请参考:[模型量化脚本使用说明](./../../README.md#模型压缩),也可下载我们量化后的 INT8 模型进行部署,请执行如下命令获取模型: +``` +# 获取序列标注 INT8 量化模型 +wget https://paddlenlp.bj.bcebos.com/models/transformers/ernie_3.0/tnews_quant_infer_model.zip +unzip tnews_quant_infer_model.zip +``` +量化模型的部署命令为: +``` +# 第一步,打开 set_dynamic_shape 开关,自动配置动态shape,在当前目录下生成 dynamic_shape_info.txt 文件 +python infer_gpu.py --task_name seq_cls --model_path ./tnews_quant_infer_model/int8 --shape_info_file dynamic_shape_info.txt --set_dynamic_shape +# 第二步,读取上一步中生成的 dynamic_shape_info.txt 文件,开启预测 +python infer_gpu.py --task_name seq_cls --model_path ./tnews_quant_infer_model/int8 --shape_info_file dynamic_shape_info.txt +``` +输出打印如下: +``` +input data: 未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗? +seq cls result: +label: news_car confidence: 0.5510320067405701 +----------------------------- +input data: 黄磊接受华少快问快答,不光智商逆天,情商也不逊黄渤 +seq cls result: +label: news_entertainment confidence: 0.9432708024978638 +----------------------------- +``` diff --git a/model_zoo/text/ernie-3.0/ernie_predictor.py b/model_zoo/text/ernie-3.0/ernie_predictor.py new file mode 100755 index 00000000000..61162de970b --- /dev/null +++ b/model_zoo/text/ernie-3.0/ernie_predictor.py @@ -0,0 +1,242 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import six +import os +import numpy as np +# import paddle +from psutil import cpu_count +from paddlenlp.transformers import AutoTokenizer +import fastdeploy + + +def token_cls_print_ret(infer_result, input_data): + rets = infer_result["value"] + for i, ret in enumerate(rets): + print("input data:", input_data[i]) + print("The model detects all entities:") + for iterm in ret: + print("entity:", iterm["entity"], " label:", iterm["label"], + " pos:", iterm["pos"]) + print("-----------------------------") + + +def seq_cls_print_ret(infer_result, input_data): + label_list = [ + "news_story", "news_culture", "news_entertainment", "news_sports", + "news_finance", "news_house", "news_car", "news_edu", "news_tech", + "news_military", "news_travel", "news_world", "news_stock", + "news_agriculture", "news_game" + ] + label = infer_result["label"].squeeze().tolist() + confidence = infer_result["confidence"].squeeze().tolist() + for i, ret in enumerate(infer_result): + print("input data:", input_data[i]) + print("seq cls result:") + print("label:", label_list[label[i]], " confidence:", confidence[i]) + print("-----------------------------") + + +class ErniePredictor(object): + def __init__(self, args): + if not isinstance(args.device, six.string_types): + print( + ">>> [InferBackend] The type of device must be string, but the type you set is: ", + type(device)) + exit(0) + args.device = args.device.lower() + if args.device not in ['cpu', 'gpu', 'xpu']: + print( + ">>> [InferBackend] The device must be cpu or gpu, but your device is set to:", + type(args.device)) + exit(0) + + self.task_name = args.task_name + self.tokenizer = AutoTokenizer.from_pretrained( + args.model_name_or_path, use_faster=True) + if args.task_name == 'seq_cls': + self.label_names = [] + self.preprocess = self.seq_cls_preprocess + self.postprocess = self.seq_cls_postprocess + self.printer = seq_cls_print_ret + elif args.task_name == 'token_cls': + self.label_names = [ + 'O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC' + ] + self.preprocess = self.token_cls_preprocess + self.postprocess = self.token_cls_postprocess + self.printer = token_cls_print_ret + else: + print( + "[ErniePredictor]: task_name only support seq_cls and token_cls now." + ) + exit(0) + + self.max_seq_length = args.max_seq_length + + if args.device == 'cpu': + args.set_dynamic_shape = False + args.shape_info_file = None + args.batch_size = 32 + if args.device == 'gpu': + args.num_threads = cpu_count(logical=False) + # Set the runtime option + runtime_option = fastdeploy.RuntimeOption() + runtime_option.set_model_path(args.model_path + ".pdmodel", + args.model_path + ".pdiparams") + precision_mode = args.precision_mode.lower() + use_fp16 = precision_mode == "fp16" + # runtime_option.use_paddle_backend() + if args.device == 'cpu': + runtime_option.use_cpu() + runtime_option.set_cpu_thread_num(args.num_threads) + if use_fp16: + runtime_option.enable_paddle_mkldnn() + elif args.device == 'gpu': + runtime_option.use_gpu() + if use_fp16: + runtime_option.use_trt_backend() + runtime_option.enable_trt_fp16() + + self.inference_backend = fastdeploy.Runtime(runtime_option._option) + if args.set_dynamic_shape: + # If set_dynamic_shape is turned on, all required dynamic shapes will be + # automatically set according to the batch_size and max_seq_length. + self.set_dynamic_shape(args.max_seq_length, args.batch_size) + exit(0) + + def seq_cls_preprocess(self, input_data: list): + data = input_data + # tokenizer + pad + data = self.tokenizer( + data, + max_length=self.max_seq_length, + padding=True, + truncation=True) + input_ids = data["input_ids"] + token_type_ids = data["token_type_ids"] + return { + "input_ids": np.array( + input_ids, dtype="int64"), + "token_type_ids": np.array( + token_type_ids, dtype="int64") + } + + def seq_cls_postprocess(self, infer_data, input_data): + logits = np.array(infer_data[0]) + max_value = np.max(logits, axis=1, keepdims=True) + exp_data = np.exp(logits - max_value) + probs = exp_data / np.sum(exp_data, axis=1, keepdims=True) + out_dict = { + "label": probs.argmax(axis=-1), + "confidence": probs.max(axis=-1) + } + return out_dict + + def token_cls_preprocess(self, data: list): + # tokenizer + pad + is_split_into_words = False + if isinstance(data[0], list): + is_split_into_words = True + data = self.tokenizer( + data, + max_length=self.max_seq_length, + padding=True, + truncation=True, + is_split_into_words=is_split_into_words) + + input_ids = data["input_ids"] + token_type_ids = data["token_type_ids"] + return { + "input_ids": np.array( + input_ids, dtype="int64"), + "token_type_ids": np.array( + token_type_ids, dtype="int64") + } + + def token_cls_postprocess(self, infer_data, input_data): + result = np.array(infer_data[0]) + tokens_label = result.argmax(axis=-1).tolist() + # 获取batch中每个token的实体 + value = [] + for batch, token_label in enumerate(tokens_label): + start = -1 + label_name = "" + items = [] + for i, label in enumerate(token_label): + if (self.label_names[label] == "O" or + "B-" in self.label_names[label]) and start >= 0: + entity = input_data[batch][start:i - 1] + if isinstance(entity, list): + entity = "".join(entity) + items.append({ + "pos": [start, i - 2], + "entity": entity, + "label": label_name, + }) + start = -1 + if "B-" in self.label_names[label]: + start = i - 1 + label_name = self.label_names[label][2:] + if start >= 0: + items.append({ + "pos": [start, len(token_label) - 1], + "entity": input_data[batch][start:len(token_label) - 1], + "label": "" + }) + value.append(items) + + out_dict = {"value": value, "tokens_label": tokens_label} + return out_dict + + def set_dynamic_shape(self, max_seq_length, batch_size): + # The dynamic shape info required by TRT is automatically generated + # according to max_seq_length and batch_size and stored in shape_info.txt + min_batch_size, max_batch_size, opt_batch_size = 1, batch_size, batch_size + min_seq_len, max_seq_len, opt_seq_len = 2, max_seq_length, max_seq_length + batches = [ + { + "input_ids": np.zeros( + [min_batch_size, min_seq_len], dtype="int64"), + "token_type_ids": np.zeros( + [min_batch_size, min_seq_len], dtype="int64") + }, + { + "input_ids": np.zeros( + [max_batch_size, max_seq_len], dtype="int64"), + "token_type_ids": np.zeros( + [max_batch_size, max_seq_len], dtype="int64") + }, + { + "input_ids": np.zeros( + [opt_batch_size, opt_seq_len], dtype="int64"), + "token_type_ids": np.zeros( + [opt_batch_size, opt_seq_len], dtype="int64") + }, + ] + for batch in batches: + self.inference_backend.infer(batch) + print( + "[InferBackend] Set dynamic shape finished, please close set_dynamic_shape and restart." + ) + + def infer(self, data): + return self.inference_backend.infer(data) + + def predict(self, input_data: list): + preprocess_result = self.preprocess(input_data) + infer_result = self.infer(preprocess_result) + result = self.postprocess(infer_result, input_data) + self.printer(result, input_data) + return result diff --git a/model_zoo/text/ernie-3.0/infer_cpu.py b/model_zoo/text/ernie-3.0/infer_cpu.py new file mode 100755 index 00000000000..3ab8121a529 --- /dev/null +++ b/model_zoo/text/ernie-3.0/infer_cpu.py @@ -0,0 +1,78 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import argparse +from psutil import cpu_count +from ernie_predictor import ErniePredictor + + +def parse_args(): + parser = argparse.ArgumentParser() + # Required parameters + parser.add_argument( + "--task_name", + default='seq_cls', + type=str, + help="The name of the task to perform predict, selected in: seq_cls and token_cls" + ) + parser.add_argument( + "--model_name_or_path", + default="ernie-3.0-medium-zh", + type=str, + help="The directory or name of model.", ) + parser.add_argument( + "--model_path", + type=str, + required=True, + help="The path prefix of inference model to be used.", ) + parser.add_argument( + "--max_seq_length", + default=128, + type=int, + help="The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded.", ) + parser.add_argument( + "--precision_mode", + type=str, + default="fp32", + choices=["fp32", "int8"], + help="Inference precision, set int8 to use dynamic quantization for acceleration.", + ) + parser.add_argument( + "--num_threads", + default=cpu_count(logical=False), + type=int, + help="num_threads for cpu.", ) + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + + args.task_name = args.task_name.lower() + args.device = 'cpu' + predictor = ErniePredictor(args) + + if args.task_name == 'seq_cls': + text = ["未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗?", "黄磊接受华少快问快答,不光智商逆天,情商也不逊黄渤"] + elif args.task_name == 'token_cls': + text = ["北京的涮肉,重庆的火锅,成都的小吃都是极具特色的美食。", "乔丹、科比、詹姆斯和姚明都是篮球界的标志性人物。"] + + outputs = predictor.predict(text) + + +if __name__ == "__main__": + main() diff --git a/model_zoo/text/ernie-3.0/infer_gpu.py b/model_zoo/text/ernie-3.0/infer_gpu.py new file mode 100755 index 00000000000..4175a29290c --- /dev/null +++ b/model_zoo/text/ernie-3.0/infer_gpu.py @@ -0,0 +1,84 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import argparse +from ernie_predictor import ErniePredictor + + +def parse_args(): + parser = argparse.ArgumentParser() + # Required parameters + parser.add_argument( + "--task_name", + default='seq_cls', + type=str, + help="The name of the task to perform predict, selected in: seq_cls and token_cls" + ) + parser.add_argument( + "--model_name_or_path", + default="ernie-3.0-medium-zh", + type=str, + help="The directory or name of model.", ) + parser.add_argument( + "--model_path", + type=str, + required=True, + help="The path prefix of inference model to be used.", ) + parser.add_argument( + "--batch_size", + default=32, + type=int, + help="Batch size for predict.", ) + parser.add_argument( + "--max_seq_length", + default=128, + type=int, + help="The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded.", ) + parser.add_argument( + "--set_dynamic_shape", + action='store_true', + help="Whether to automatically set dynamic shape.", ) + parser.add_argument( + "--shape_info_file", + default="shape_info.txt", + type=str, + help="The collected dynamic shape info file.", ) + parser.add_argument( + "--precision_mode", + type=str, + default="fp32", + choices=["fp32", "fp16", "int8"], + help="Inference precision.", ) + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + args.task_name = args.task_name.lower() + args.device = 'gpu' + predictor = ErniePredictor(args) + + if args.task_name == 'seq_cls': + text = ["未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗?", "黄磊接受华少快问快答,不光智商逆天,情商也不逊黄渤"] + elif args.task_name == 'token_cls': + text = ["北京的涮肉,重庆的火锅,成都的小吃都是极具特色的美食。", "乔丹、科比、詹姆斯和姚明都是篮球界的标志性人物。"] + + outputs = predictor.predict(text) + + +if __name__ == "__main__": + main() diff --git a/model_zoo/text/ernie-3.0/requirements_cpu.txt b/model_zoo/text/ernie-3.0/requirements_cpu.txt new file mode 100755 index 00000000000..9725b91945d --- /dev/null +++ b/model_zoo/text/ernie-3.0/requirements_cpu.txt @@ -0,0 +1,3 @@ +onnxruntime +psutil +paddlenlp diff --git a/model_zoo/text/ernie-3.0/requirements_gpu.txt b/model_zoo/text/ernie-3.0/requirements_gpu.txt new file mode 100755 index 00000000000..bd5e113bfdc --- /dev/null +++ b/model_zoo/text/ernie-3.0/requirements_gpu.txt @@ -0,0 +1,4 @@ +onnxruntime-gpu +onnxconverter-common +psutil +paddlenlp