diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7c1bfdde744..a030bddd004 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -37,22 +37,24 @@ if(NOT MSVC)
 endif(NOT MSVC)
 
 #############################CMAKE FOR FASTDEPLOY################################
-option(ENABLE_PADDLE_FRONTEND "if to enable PaddlePaddle frontend to support load paddle model in fastdeploy." ON)
-option(WITH_GPU "if WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernce-gpu" OFF)
-option(ENABLE_ORT_BACKEND "if to enable onnxruntime backend." OFF)
-option(ENABLE_TRT_BACKEND "if to enable tensorrt backend." OFF)
-option(ENABLE_PADDLE_BACKEND "if to enable paddle backend." OFF)
-option(CUDA_DIRECTORY "if build tensorrt backend, need to define path of cuda library.")
-option(TRT_DIRECTORY "if build tensorrt backend, need to define path of tensorrt library.")
-option(ENABLE_VISION "if to enable vision models usage." OFF)
-option(ENABLE_VISION_VISUALIZE "if to enable visualize vision model result toolbox." ON)
+option(ENABLE_PADDLE_FRONTEND "Whether to enable PaddlePaddle frontend to support load paddle model in fastdeploy." ON)
+option(WITH_GPU "Whether WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernce-gpu" OFF)
+option(ENABLE_ORT_BACKEND "Whether to enable onnxruntime backend." OFF)
+option(ENABLE_TRT_BACKEND "Whether to enable tensorrt backend." OFF)
+option(ENABLE_PADDLE_BACKEND "Whether to enable paddle backend." OFF)
+option(CUDA_DIRECTORY "If build tensorrt backend, need to define path of cuda library.")
+option(TRT_DIRECTORY "If build tensorrt backend, need to define path of tensorrt library.")
+option(ENABLE_VISION "Whether to enable vision models usage." OFF)
+option(ENABLE_VISION_VISUALIZE "Whether to enable visualize vision model result toolbox." ON)
+option(ENABLE_TEXT "Whether to enable text models usage." OFF)
 
 # Please don't open this flag now, some bugs exists.
-option(ENABLE_OPENCV_CUDA "if to enable opencv with cuda, this will allow process image with GPU." OFF)
-option(ENABLE_DEBUG "if to enable print debug information, this may reduce performance." OFF)
+option(ENABLE_OPENCV_CUDA "Whether to enable opencv with cuda, this will allow process image with GPU." OFF)
+option(ENABLE_DEBUG "Whether to enable print debug information, this may reduce performance." OFF)
 
 # Whether to build fastdeply with vision/text/... examples, only for testings.
 option(WITH_VISION_EXAMPLES "Whether to build fastdeply with vision examples" OFF)
+option(WITH_TEXT_EXAMPLES "Whether to build fastdeply with text examples" OFF)
 
 # Check for 32bit system
 if(WIN32)
@@ -98,14 +100,21 @@ if (WITH_VISION_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
   set(ENABLE_VISION_VISUALIZE ON CACHE BOOL "force to enable visualize vision model result toolbox" FORCE)
 endif()
 
+if (WITH_TEXT_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
+  # ENABLE_TEXT must be ON if enable text examples.
+  message(STATUS "Found WITH_TEXT_EXAMPLES ON, so, force ENABLE_TEXT ON")
+  set(ENABLE_TEXT ON CACHE BOOL "force to enable text models usage" FORCE)
+endif()
+
 add_definitions(-DFASTDEPLOY_LIB)
 file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*.cc)
 file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/ort/*.cc)
 file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/paddle/*.cc)
 file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cpp)
 file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc)
+file(GLOB_RECURSE DEPLOY_TEXT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/text/*.cc)
 file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*_pybind.cc)
-list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_VISION_SRCS})
+list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS})
 
 set(DEPEND_LIBS "")
 
@@ -113,6 +122,7 @@ file(READ "${PROJECT_SOURCE_DIR}/VERSION_NUMBER" FASTDEPLOY_VERSION)
 string(STRIP "${FASTDEPLOY_VERSION}" FASTDEPLOY_VERSION)
 
 set(THIRD_PARTY_PATH ${CMAKE_CURRENT_BINARY_DIR}/third_libs)
+include(external/eigen.cmake)
 if(ENABLE_PADDLE_FRONTEND)
   add_definitions(-DENABLE_PADDLE_FRONTEND)
   include(${PROJECT_SOURCE_DIR}/external/paddle2onnx.cmake)
@@ -207,6 +217,12 @@ else()
   endif()
 endif()
 
+if(ENABLE_TEXT)
+  add_definitions(-DENABLE_TEXT)
+  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_TEXT_SRCS})
+  include(external/faster_tokenizer.cmake)
+endif()
+
 configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h)
 configure_file(${PROJECT_SOURCE_DIR}/FastDeploy.cmake.in ${PROJECT_SOURCE_DIR}/FastDeploy.cmake @ONLY)
 
@@ -249,6 +265,15 @@ if (WITH_VISION_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
   add_subdirectory(examples)
 endif()
 
+if (WITH_TEXT_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
+  add_definitions(-DWITH_TEXT_EXAMPLES)
+  set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/examples/bin)
+  # Avoid to add_subdirectory repeatedly
+  if (NOT WITH_VISION_EXAMPLES)
+    add_subdirectory(examples)
+  endif()
+endif()
+
 include(external/summary.cmake)
 fastdeploy_summary()
 if(WIN32)
@@ -307,6 +332,12 @@ if(BUILD_FASTDEPLOY_PYTHON)
     file(GLOB_RECURSE VISION_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*_pybind.cc)
     list(REMOVE_ITEM DEPLOY_PYBIND_SRCS ${VISION_PYBIND_SRCS})
   endif()
+
+  if (NOT ENABLE_TEXT)
+    file(GLOB_RECURSE TEXT_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/text/*_pybind.cc)
+    list(REMOVE_ITEM DEPLOY_PYBIND_SRCS ${TEXT_PYBIND_SRCS})
+  endif()
+
   add_library(${PY_LIBRARY_NAME} MODULE ${DEPLOY_PYBIND_SRCS})
   redefine_file_macro(${PY_LIBRARY_NAME})
   set_target_properties(${PY_LIBRARY_NAME} PROPERTIES PREFIX "")
diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in
index 4f4643fdfba..50910f0737c 100644
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -7,6 +7,7 @@ set(PADDLEINFERENCE_VERSION @PADDLEINFERENCE_VERSION@)
 set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND@)
 set(ENABLE_PADDLE_FRONTEND @ENABLE_PADDLE_FRONTEND@)
 set(ENABLE_VISION @ENABLE_VISION@)
+set(ENABLE_TEXT @ENABLE_TEXT@)
 set(ENABLE_OPENCV_CUDA @ENABLE_OPENCV_CUDA@)
 set(LIBRARY_NAME @LIBRARY_NAME@)
 
@@ -87,6 +88,10 @@ if(ENABLE_VISION)
   endif()
 endif()
 
+if (ENABLE_TEXT)
+# Add dependency libs later
+endif()
+
 if(ENABLE_PADDLE_FRONTEND)
   find_library(PADDLE2ONNX_LIB paddle2onnx  ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle2onnx/lib)
   list(APPEND FASTDEPLOY_LIBS ${PADDLE2ONNX_LIB})
@@ -109,6 +114,7 @@ if(ENABLE_PADDLE_BACKEND)
 endif()
 message(STATUS "  ENABLE_TRT_BACKEND        : ${ENABLE_TRT_BACKEND}")
 message(STATUS "  ENABLE_VISION             : ${ENABLE_VISION}")
+message(STATUS "  ENABLE_TEXT               : ${ENABLE_TEXT}")
 
 if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
   if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.4.0")
diff --git a/csrcs/fastdeploy/core/fd_tensor.cc b/csrcs/fastdeploy/core/fd_tensor.cc
index dbefbd9ecca..c278763cabe 100644
--- a/csrcs/fastdeploy/core/fd_tensor.cc
+++ b/csrcs/fastdeploy/core/fd_tensor.cc
@@ -50,6 +50,13 @@ void* FDTensor::Data() {
   return data.data();
 }
 
+const void* FDTensor::Data() const {
+  if (external_data_ptr != nullptr) {
+    return external_data_ptr;
+  }
+  return data.data();
+}
+
 void FDTensor::SetExternalData(const std::vector<int>& new_shape,
                                const FDDataType& data_type, void* data_buffer) {
   dtype = data_type;
diff --git a/csrcs/fastdeploy/core/fd_tensor.h b/csrcs/fastdeploy/core/fd_tensor.h
index a00ff87fdfc..14c5a1142be 100644
--- a/csrcs/fastdeploy/core/fd_tensor.h
+++ b/csrcs/fastdeploy/core/fd_tensor.h
@@ -54,6 +54,8 @@ struct FASTDEPLOY_DECL FDTensor {
   // will copy to cpu store in `temporary_cpu_buffer`
   void* Data();
 
+  const void* Data() const;
+
   // Set user memory buffer for Tensor, the memory is managed by
   // the user it self, but the Tensor will share the memory with user
   // So take care with the user buffer
@@ -81,4 +83,4 @@ struct FASTDEPLOY_DECL FDTensor {
   explicit FDTensor(const std::string& tensor_name);
 };
 
-} // namespace fastdeploy
+}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/core/fd_type.cc b/csrcs/fastdeploy/core/fd_type.cc
index 8d624cdf270..ae70fa6e536 100644
--- a/csrcs/fastdeploy/core/fd_type.cc
+++ b/csrcs/fastdeploy/core/fd_type.cc
@@ -93,4 +93,31 @@ std::string Str(const FDDataType& fdt) {
   return out;
 }
 
+template <typename PlainType>
+const FDDataType TypeToDataType<PlainType>::dtype = UNKNOWN1;
+
+template <>
+const FDDataType TypeToDataType<bool>::dtype = BOOL;
+
+template <>
+const FDDataType TypeToDataType<int16_t>::dtype = INT16;
+
+template <>
+const FDDataType TypeToDataType<int32_t>::dtype = INT32;
+
+template <>
+const FDDataType TypeToDataType<int64_t>::dtype = INT64;
+
+template <>
+const FDDataType TypeToDataType<float>::dtype = FP32;
+
+template <>
+const FDDataType TypeToDataType<double>::dtype = FP64;
+
+template <>
+const FDDataType TypeToDataType<uint8_t>::dtype = UINT8;
+
+template <>
+const FDDataType TypeToDataType<int8_t>::dtype = INT8;
+
 }  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/core/fd_type.h b/csrcs/fastdeploy/core/fd_type.h
index 325551dfb3a..50b00dca893 100644
--- a/csrcs/fastdeploy/core/fd_type.h
+++ b/csrcs/fastdeploy/core/fd_type.h
@@ -54,4 +54,10 @@ enum FASTDEPLOY_DECL FDDataType {
 FASTDEPLOY_DECL std::string Str(const FDDataType& fdt);
 
 FASTDEPLOY_DECL int32_t FDDataTypeSize(const FDDataType& data_dtype);
+
+template <typename PlainType>
+struct FASTDEPLOY_DECL TypeToDataType {
+  static const FDDataType dtype;
+};
+
 }  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/text.h b/csrcs/fastdeploy/text.h
new file mode 100644
index 00000000000..184f0f4f916
--- /dev/null
+++ b/csrcs/fastdeploy/text.h
@@ -0,0 +1,19 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include "fastdeploy/core/config.h"
+#ifdef ENABLE_TEXT
+#include "fastdeploy/text/text_model.h"
+#endif
diff --git a/csrcs/fastdeploy/text/common/option.h b/csrcs/fastdeploy/text/common/option.h
new file mode 100644
index 00000000000..a795fd06693
--- /dev/null
+++ b/csrcs/fastdeploy/text/common/option.h
@@ -0,0 +1,26 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "fastdeploy/utils/utils.h"
+
+namespace fastdeploy {
+namespace text {
+
+struct FASTDEPLOY_DECL TextPreprocessOption {};
+struct FASTDEPLOY_DECL TextPostprocessOption {};
+struct FASTDEPLOY_DECL PredictionOption {};
+
+}  // namespace text
+}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/text/common/result.cc b/csrcs/fastdeploy/text/common/result.cc
new file mode 100644
index 00000000000..cb7efbb73e9
--- /dev/null
+++ b/csrcs/fastdeploy/text/common/result.cc
@@ -0,0 +1,18 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "fastdeploy/text/common/result.h"
+
+namespace fastdeploy {
+namespace text {}  // namespace text
+}  // namespace fastdeploy
\ No newline at end of file
diff --git a/csrcs/fastdeploy/text/common/result.h b/csrcs/fastdeploy/text/common/result.h
new file mode 100644
index 00000000000..4a6f716a38a
--- /dev/null
+++ b/csrcs/fastdeploy/text/common/result.h
@@ -0,0 +1,23 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "fastdeploy/utils/utils.h"
+
+namespace fastdeploy {
+namespace text {
+
+struct FASTDEPLOY_DECL Result {};
+
+}  // namespace text
+}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/text/postprocessor/postprocessor.cc b/csrcs/fastdeploy/text/postprocessor/postprocessor.cc
new file mode 100644
index 00000000000..e8f71774392
--- /dev/null
+++ b/csrcs/fastdeploy/text/postprocessor/postprocessor.cc
@@ -0,0 +1,31 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/text/postprocessor/postprocessor.h"
+
+namespace fastdeploy {
+namespace text {
+
+bool Postprocessor::Decode(const std::vector<FDTensor>& model_result,
+                           Result* decoded_result) const {
+  return true;
+}
+
+bool Postprocessor::DecodeBatch(const std::vector<FDTensor>& model_result,
+                                Result* decoded_result) const {
+  return true;
+}
+
+}  // namespace text
+}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/text/postprocessor/postprocessor.h b/csrcs/fastdeploy/text/postprocessor/postprocessor.h
new file mode 100644
index 00000000000..76f6a709000
--- /dev/null
+++ b/csrcs/fastdeploy/text/postprocessor/postprocessor.h
@@ -0,0 +1,34 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <vector>
+#include "fastdeploy/core/fd_tensor.h"
+#include "fastdeploy/text/common/result.h"
+#include "fastdeploy/utils/utils.h"
+
+namespace fastdeploy {
+namespace text {
+
+class Postprocessor {
+ public:
+  virtual bool Decode(const std::vector<FDTensor>& model_result,
+                      Result* decoded_result) const;
+  virtual bool DecodeBatch(const std::vector<FDTensor>& model_result,
+                           Result* decoded_result) const;
+};
+
+}  // namespace text
+}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/text/preprocessor/preprocessor.cc b/csrcs/fastdeploy/text/preprocessor/preprocessor.cc
new file mode 100644
index 00000000000..2e2715f61c2
--- /dev/null
+++ b/csrcs/fastdeploy/text/preprocessor/preprocessor.cc
@@ -0,0 +1,32 @@
+
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/text/preprocessor/preprocessor.h"
+
+namespace fastdeploy {
+namespace text {
+
+bool Preprocessor::Encode(const std::string& raw_text,
+                          std::vector<FDTensor>* encoded_tensor) const {
+  return true;
+}
+
+bool Preprocessor::EncodeBatch(const std::vector<std::string>& raw_texts,
+                               std::vector<FDTensor>* encoded_tensor) const {
+  return true;
+}
+
+}  // namespace text
+}  // namespace fastdeploy
\ No newline at end of file
diff --git a/csrcs/fastdeploy/text/preprocessor/preprocessor.h b/csrcs/fastdeploy/text/preprocessor/preprocessor.h
new file mode 100644
index 00000000000..79996709389
--- /dev/null
+++ b/csrcs/fastdeploy/text/preprocessor/preprocessor.h
@@ -0,0 +1,34 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+#include "fastdeploy/core/fd_tensor.h"
+#include "fastdeploy/utils/utils.h"
+
+namespace fastdeploy {
+namespace text {
+
+class Preprocessor {
+ public:
+  virtual bool Encode(const std::string& raw_text,
+                      std::vector<FDTensor>* encoded_tensor) const;
+  virtual bool EncodeBatch(const std::vector<std::string>& raw_texts,
+                           std::vector<FDTensor>* encoded_tensor) const;
+};
+
+}  // namespace text
+}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/text/text_model.cc b/csrcs/fastdeploy/text/text_model.cc
new file mode 100644
index 00000000000..d5a40c0e56a
--- /dev/null
+++ b/csrcs/fastdeploy/text/text_model.cc
@@ -0,0 +1,79 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/text/text_model.h"
+#include "fastdeploy/text/common/option.h"
+#include "fastdeploy/text/common/result.h"
+#include "fastdeploy/text/postprocessor/postprocessor.h"
+#include "fastdeploy/text/preprocessor/preprocessor.h"
+
+namespace fastdeploy {
+namespace text {
+
+bool TextModel::Predict(const std::string& raw_text, Result* result,
+                        const PredictionOption& option) {
+  // Preprocess
+  std::vector<FDTensor> input_tensor;
+  std::vector<FDTensor> output_tensor;
+  if (!preprocessor_->Encode(raw_text, &input_tensor)) {
+    FDERROR << "Failed to preprocess input data while using model:"
+            << ModelName() << "." << std::endl;
+    return false;
+  }
+
+  // Inference Runtime
+  if (!Infer(input_tensor, &output_tensor)) {
+    FDERROR << "Failed to inference while using model:" << ModelName() << "."
+            << std::endl;
+    return false;
+  }
+
+  // Postprocess
+  if (postprocessor_->Decode(output_tensor, result)) {
+    FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool TextModel::PredictBatch(const std::vector<std::string>& raw_text_array,
+                             Result* results, const PredictionOption& option) {
+  // Preprocess
+  std::vector<FDTensor> input_tensor;
+  std::vector<FDTensor> output_tensor;
+  if (!preprocessor_->EncodeBatch(raw_text_array, &input_tensor)) {
+    FDERROR << "Failed to preprocess input data while using model:"
+            << ModelName() << "." << std::endl;
+    return false;
+  }
+
+  // Inference Runtime
+  if (!Infer(input_tensor, &output_tensor)) {
+    FDERROR << "Failed to inference while using model:" << ModelName() << "."
+            << std::endl;
+    return false;
+  }
+
+  // Postprocess
+  if (postprocessor_->DecodeBatch(output_tensor, results)) {
+    FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+}  // namespace text
+}  // namespace fastdeploy
\ No newline at end of file
diff --git a/csrcs/fastdeploy/text/text_model.h b/csrcs/fastdeploy/text/text_model.h
new file mode 100644
index 00000000000..b7fbd592972
--- /dev/null
+++ b/csrcs/fastdeploy/text/text_model.h
@@ -0,0 +1,51 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <memory>
+
+#include "fastdeploy/fastdeploy_model.h"
+#include "fastdeploy/utils/unique_ptr.h"
+
+namespace fastdeploy {
+namespace text {
+
+class Preprocessor;
+class Postprocessor;
+class Result;
+class PredictionOption;
+
+class FASTDEPLOY_DECL TextModel : public FastDeployModel {
+ public:
+  virtual std::string ModelName() const { return "TextModel"; }
+  virtual bool Predict(const std::string& raw_text, Result* result,
+                       const PredictionOption& option);
+  virtual bool PredictBatch(const std::vector<std::string>& raw_text_array,
+                            Result* result, const PredictionOption& option);
+  template <typename T, typename... Args>
+  void SetPreprocessor(Args&&... args) {
+    preprocessor_ = utils::make_unique<T>(std::forward<Args>(args)...);
+  }
+  template <typename T, typename... Args>
+  void SetPostprocessor(Args&&... args) {
+    postprocessor_ = utils::make_unique<T>(std::forward<Args>(args)...);
+  }
+
+ private:
+  std::unique_ptr<Preprocessor> preprocessor_;
+  std::unique_ptr<Postprocessor> postprocessor_;
+};
+
+}  // namespace text
+}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/text/text_pybind.cc b/csrcs/fastdeploy/text/text_pybind.cc
new file mode 100644
index 00000000000..564892f1679
--- /dev/null
+++ b/csrcs/fastdeploy/text/text_pybind.cc
@@ -0,0 +1,13 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
\ No newline at end of file
diff --git a/csrcs/fastdeploy/utils/unique_ptr.h b/csrcs/fastdeploy/utils/unique_ptr.h
new file mode 100644
index 00000000000..2f24ef70c6b
--- /dev/null
+++ b/csrcs/fastdeploy/utils/unique_ptr.h
@@ -0,0 +1,58 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <memory>
+
+namespace fastdeploy {
+namespace utils {
+// Trait to select overloads and return types for MakeUnique.
+template <typename T>
+struct MakeUniqueResult {
+  using scalar = std::unique_ptr<T>;
+};
+template <typename T>
+struct MakeUniqueResult<T[]> {
+  using array = std::unique_ptr<T[]>;
+};
+template <typename T, size_t N>
+struct MakeUniqueResult<T[N]> {
+  using invalid = void;
+};
+
+// MakeUnique<T>(...) is an early implementation of C++14 std::make_unique.
+// It is designed to be 100% compatible with std::make_unique so that the
+// eventual switchover will be a simple renaming operation.
+template <typename T, typename... Args>
+typename MakeUniqueResult<T>::scalar make_unique(Args &&... args) {  // NOLINT
+  return std::unique_ptr<T>(
+      new T(std::forward<Args>(args)...));  // NOLINT(build/c++11)
+}
+
+// Overload for array of unknown bound.
+// The allocation of arrays needs to use the array form of new,
+// and cannot take element constructor arguments.
+template <typename T>
+typename MakeUniqueResult<T>::array make_unique(size_t n) {
+  return std::unique_ptr<T>(new typename std::remove_extent<T>::type[n]());
+}
+
+// Reject arrays of known bound.
+template <typename T, typename... Args>
+typename MakeUniqueResult<T>::invalid make_unique(Args &&... /* args */) =
+    delete;  // NOLINT
+
+}  // namespace utils
+}  // namespace fastdeploy
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 31ca40af3c1..770bf44da2f 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -37,4 +37,14 @@ if(WITH_VISION_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples/vision)
   endforeach()
 endif()
 
+# text examples
+if(WITH_TEXT_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples/text)
+  message(STATUS "")
+  message(STATUS "*************FastDeploy Examples Summary**********")
+  file(GLOB ALL_VISION_EXAMPLE_SRCS ${PROJECT_SOURCE_DIR}/examples/text/*.cc)
+  foreach(_CC_FILE ${ALL_VISION_EXAMPLE_SRCS})
+    add_fastdeploy_executable(text ${_CC_FILE})
+  endforeach()
+endif()
+
 # other examples ...
diff --git a/examples/text/compute.h b/examples/text/compute.h
new file mode 100644
index 00000000000..b279473b75c
--- /dev/null
+++ b/examples/text/compute.h
@@ -0,0 +1,270 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <vector>
+#include "fastdeploy/core/fd_tensor.h"
+#include "unsupported/Eigen/CXX11/Tensor"
+
+namespace fastdeploy {
+// EigenDim converts shape into Eigen::DSizes.
+template <int D>
+struct EigenDim {
+  using Type = Eigen::DSizes<Eigen::DenseIndex, D>;
+
+  static Type From(const std::vector<int64_t>& dims) {
+    Type ret;
+    for (int64_t d = 0; d < dims.size(); d++) {
+      ret[d] = dims[d];
+    }
+    return ret;
+  }
+};
+
+// Interpret FDTensor as EigenTensor and EigenConstTensor.
+template <typename T, size_t D, int MajorType = Eigen::RowMajor,
+          typename IndexType = Eigen::DenseIndex>
+struct EigenTensor {
+  using Type = Eigen::TensorMap<Eigen::Tensor<T, D, MajorType, IndexType>>;
+
+  using ConstType =
+      Eigen::TensorMap<Eigen::Tensor<const T, D, MajorType, IndexType>>;
+
+  static Type From(FDTensor& tensor,
+                   const std::vector<int64_t>& dims) {  // NOLINT
+    return Type(reinterpret_cast<T*>(tensor.data.data()),
+                EigenDim<D>::From(dims));
+  }
+
+  static Type From(FDTensor& tensor) {  // NOLINT
+    return From(tensor, tensor.shape);
+  }  // NOLINT
+
+  static ConstType From(const FDTensor& tensor,
+                        const std::vector<int64_t>& dims) {
+    return ConstType(reinterpret_cast<const T*>(tensor.data.data()),
+                     EigenDim<D>::From(dims));
+  }
+
+  static ConstType From(const FDTensor& tensor) {
+    return From(tensor, tensor.shape);
+  }
+};
+
+template <typename T, int MajorType = Eigen::RowMajor,
+          typename IndexType = Eigen::DenseIndex>
+struct EigenScalar {
+  // Scalar tensor (implemented as a rank-0 tensor) of scalar type T.
+  using Type = Eigen::TensorMap<
+      Eigen::TensorFixedSize<T, Eigen::Sizes<>, MajorType, IndexType>>;
+  using ConstType = Eigen::TensorMap<
+      Eigen::TensorFixedSize<const T, Eigen::Sizes<>, MajorType, IndexType>>;
+
+  static Type From(FDTensor& tensor) {
+    return Type(reinterpret_cast<T*>(tensor.data.data()));
+  }  // NOLINT
+
+  static ConstType From(const FDTensor& tensor) {
+    return ConstType(reinterpret_cast<const T*>(tensor.data.data()));
+  }
+};
+
+template <typename T, int MajorType = Eigen::RowMajor,
+          typename IndexType = Eigen::DenseIndex>
+struct EigenVector : public EigenTensor<T, 1, MajorType, IndexType> {
+  // Flatten reshapes a Tensor into an EigenVector.
+  static typename EigenVector::Type Flatten(FDTensor& tensor) {  // NOLINT
+    return EigenVector::From(tensor, {tensor.Numel()});
+  }
+
+  static typename EigenVector::ConstType Flatten(
+      const FDTensor& tensor) {  // NOLINT
+    return EigenVector::From(tensor, {tensor.Numel()});
+  }
+};
+
+template <typename T, size_t D, size_t R_D, typename Functor>
+void ReduceFunctor(const Eigen::DefaultDevice& dev, const FDTensor& input,
+                   FDTensor* output, const std::vector<int64_t>& dims,
+                   bool keep_dim = true) {
+  auto x = EigenTensor<T, D>::From(input);
+  auto x_rank = static_cast<int>(x.dimensions().size());
+  auto reduce_dim = Eigen::array<int, R_D>();
+  std::vector<int64_t> dims_ref = dims;
+  std::vector<int> out_dims(input.shape.size());
+  std::copy(input.shape.begin(), input.shape.end(), out_dims.begin());
+  for (size_t i = 0; i < dims_ref.size(); ++i) {
+    if (dims_ref[i] < 0) dims_ref[i] = x_rank + dims_ref[i];
+    out_dims[dims_ref[i]] = 1;
+    reduce_dim[i] = dims_ref[i];
+  }
+  output->Allocate(out_dims, TypeToDataType<T>::dtype);
+  if (keep_dim && x_rank > 1) {
+    const int kDelFlag = -2;
+    auto dims_vector = out_dims;
+    for (size_t i = 0; i < dims_ref.size(); ++i) {
+      dims_vector[dims_ref[i]] = kDelFlag;
+    }
+    dims_vector.erase(remove(dims_vector.begin(), dims_vector.end(), kDelFlag),
+                      dims_vector.end());
+    out_dims = dims_vector;
+  }
+  Functor functor;
+
+  if (D == 1) {
+    auto out = EigenScalar<T>::From(*output);
+    functor(dev, &x, &out, reduce_dim);
+  } else {
+    dims_ref.resize(out_dims.size());
+    std::copy(out_dims.begin(), out_dims.end(), dims_ref.begin());
+    for (int i = 0; i < dims_ref.size(); ++i) {
+      std::cerr << dims_ref[i] << ", ";
+    }
+    std::cerr << std::endl;
+    auto out = EigenTensor<T, (D - R_D)>::From(*output, dims_ref);
+    functor(dev, &x, &out, reduce_dim);
+  }
+}
+
+struct MaxFunctor {
+  template <typename X, typename Y, typename Dim>
+  void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
+    y->device(dev) = x->maximum(dim);
+  }
+};
+
+struct SumFunctor {
+  template <typename X, typename Y, typename Dim>
+  void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
+    y->device(dev) = x->sum(dim);
+  }
+};
+
+inline void GetBroadcastDimsArrays(const std::vector<int64_t>& x_dims,
+                                   const std::vector<int64_t>& y_dims,
+                                   int* x_dims_array, int* y_dims_array,
+                                   int* out_dims_array, const int max_dim,
+                                   const int axis) {
+  if (x_dims.size() > y_dims.size()) {
+    std::fill(y_dims_array, y_dims_array + axis, 1);
+    if (axis + y_dims.size() < max_dim) {
+      std::fill(y_dims_array + axis + y_dims.size(), y_dims_array + max_dim, 1);
+    }
+    std::copy(x_dims.data(), x_dims.data() + x_dims.size(), x_dims_array);
+    std::copy(y_dims.data(), y_dims.data() + y_dims.size(),
+              y_dims_array + axis);
+  } else {
+    std::fill(x_dims_array, x_dims_array + axis, 1);
+    if (axis + x_dims.size() < max_dim) {
+      std::fill(x_dims_array + axis + x_dims.size(), x_dims_array + max_dim, 1);
+    }
+    std::copy(x_dims.data(), x_dims.data() + x_dims.size(),
+              x_dims_array + axis);
+    std::copy(y_dims.data(), y_dims.data() + y_dims.size(), y_dims_array);
+  }
+
+  for (int i = 0; i < max_dim; i++) {
+    if ((x_dims_array[i] > 1 || y_dims_array[i] > 1) ||
+        (x_dims_array[i] == 1 && y_dims_array[i] == 1)) {
+      out_dims_array[i] = (std::max)(x_dims_array[i], y_dims_array[i]);
+    } else {
+      out_dims_array[i] = -1;
+    }
+  }
+}
+
+inline int GetElementwiseIndex(const int* x_dims_array, const int max_dim,
+                               const int* index_array) {
+  int index_ = 0;
+  for (int i = 0; i < max_dim; i++) {
+    if (x_dims_array[i] > 1) {
+      index_ = index_ * x_dims_array[i] + index_array[i];
+    }
+  }
+  return index_;
+}
+
+inline void UpdateElementwiseIndexArray(const int* out_dims_array,
+                                        const int max_dim, int* index_array) {
+  for (int i = max_dim - 1; i >= 0; --i) {
+    ++index_array[i];
+    if (index_array[i] >= out_dims_array[i]) {
+      index_array[i] -= out_dims_array[i];
+    } else {
+      break;
+    }
+  }
+}
+
+template <typename Functor, typename T, typename OutType = T>
+void CommonElementwiseBroadcastForward(const FDTensor& x, const FDTensor& y,
+                                       FDTensor* z, Functor func, int axis,
+                                       const bool is_xsize_larger = true) {
+  std::vector<int64_t> x_dims = x.shape;
+  std::vector<int64_t> y_dims = y.shape;
+  int max_dim = (std::max)(x_dims.size(), y_dims.size());
+  int diff = x_dims.size() - y_dims.size();
+  axis = (axis == -1 ? std::abs(diff) : axis);
+  std::vector<int> x_dims_array(max_dim);
+  std::vector<int> y_dims_array(max_dim);
+  std::vector<int> out_dims_array(max_dim);
+  GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array.data(),
+                         y_dims_array.data(), out_dims_array.data(), max_dim,
+                         axis);
+
+  const T* x_data = reinterpret_cast<const T*>(x.Data());
+  const T* y_data = reinterpret_cast<const T*>(y.Data());
+
+  z->Allocate(out_dims_array, TypeToDataType<OutType>::dtype);
+  OutType* out_data = reinterpret_cast<T*>(z->MutableData());
+
+  const int out_size =
+      std::accumulate(out_dims_array.data(), out_dims_array.data() + max_dim, 1,
+                      std::multiplies<int>());
+  int x_index, y_index;
+  std::vector<int> index_array(max_dim, 0);
+  for (int out_index = 0; out_index < out_size; ++out_index) {
+    x_index =
+        GetElementwiseIndex(x_dims_array.data(), max_dim, index_array.data());
+    y_index =
+        GetElementwiseIndex(y_dims_array.data(), max_dim, index_array.data());
+    if (is_xsize_larger) {
+      out_data[out_index] = func(x_data[x_index], y_data[y_index]);
+    } else {
+      out_data[out_index] = func(y_data[y_index], x_data[x_index]);
+    }
+
+    UpdateElementwiseIndexArray(out_dims_array.data(), max_dim,
+                                index_array.data());
+  }
+}
+
+template <typename T>
+struct AddFunctor {
+  T operator()(const T& lhs, const T& rhs) { return lhs + rhs; }
+};
+
+template <typename T>
+struct SubFunctor {
+  T operator()(const T& lhs, const T& rhs) { return lhs - rhs; }
+};
+
+template <typename T>
+struct DivFunctor {
+  T operator()(const T& lhs, const T& rhs) { return lhs / rhs; }
+};
+
+}  // namespace fastdeploy
diff --git a/examples/text/ernie_tokencls.cc b/examples/text/ernie_tokencls.cc
new file mode 100644
index 00000000000..4df1f570556
--- /dev/null
+++ b/examples/text/ernie_tokencls.cc
@@ -0,0 +1,232 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <iostream>
+#include <sstream>
+
+#include "fastdeploy/text.h"
+#include "tokenizers/ernie_faster_tokenizer.h"
+
+using namespace paddlenlp;
+
+void LoadTransitionFromFile(const std::string& file,
+                            std::vector<float>* transitions, int* num_tags) {
+  std::ifstream fin(file);
+  std::string curr_transition;
+  float transition;
+  int i = 0;
+  while (fin) {
+    std::getline(fin, curr_transition);
+    std::istringstream iss(curr_transition);
+    while (iss) {
+      iss >> transition;
+      transitions->push_back(transition);
+    }
+    if (curr_transition != "") {
+      ++i;
+    }
+  }
+  *num_tags = i;
+}
+
+// Only useful for axis = -1
+template <typename T>
+void Softmax(const fastdeploy::FDTensor& input, fastdeploy::FDTensor* output) {
+  auto softmax_func = [](const T* score_vec, T* softmax_vec, int label_num) {
+    double score_max = *(std::max_element(score_vec, score_vec + label_num));
+    double e_sum = 0;
+    for (int j = 0; j < label_num; j++) {
+      softmax_vec[j] = std::exp(score_vec[j] - score_max);
+      e_sum += softmax_vec[j];
+    }
+    for (int k = 0; k < label_num; k++) {
+      softmax_vec[k] /= e_sum;
+    }
+  };
+
+  std::vector<int32_t> output_shape;
+  for (int i = 0; i < input.shape.size(); ++i) {
+    output_shape.push_back(input.shape[i]);
+  }
+  output->Allocate(output_shape, input.dtype);
+  int label_num = output_shape.back();
+  int batch_size = input.Numel() / label_num;
+  int offset = 0;
+  const T* input_ptr = reinterpret_cast<const T*>(input.Data());
+  T* output_ptr = reinterpret_cast<T*>(output->Data());
+  for (int i = 0; i < batch_size; ++i) {
+    softmax_func(input_ptr + offset, output_ptr + offset, label_num);
+    offset += label_num;
+  }
+}
+
+// Only useful for axis = -1
+template <typename T>
+void Max(const fastdeploy::FDTensor& input, fastdeploy::FDTensor* output) {
+  std::vector<int32_t> output_shape;
+  for (int i = 0; i < input.shape.size() - 1; ++i) {
+    output_shape.push_back(input.shape[i]);
+  }
+  output_shape.push_back(1);
+  output->Allocate(output_shape, input.dtype);
+  int batch_size = output->Numel();
+  int label_num = input.shape.back();
+  int offset = 0;
+  const T* input_ptr = reinterpret_cast<const T*>(input.Data());
+  T* output_ptr = reinterpret_cast<T*>(output->Data());
+  for (int i = 0; i < batch_size; ++i) {
+    output_ptr[i] =
+        *(std::max_element(input_ptr + offset, input_ptr + offset + label_num));
+    offset += label_num;
+  }
+}
+
+template <typename T>
+void ViterbiDecode(const fastdeploy::FDTensor& slot_logits,
+                   const fastdeploy::FDTensor& trans,
+                   fastdeploy::FDTensor* best_path) {
+  int batch_size = slot_logits.shape[0];
+  int seq_len = slot_logits.shape[1];
+  int num_tags = slot_logits.shape[2];
+  best_path->Allocate({batch_size, seq_len}, fastdeploy::FDDataType::INT64);
+
+  const T* slot_logits_ptr = reinterpret_cast<const T*>(slot_logits.Data());
+  const T* trans_ptr = reinterpret_cast<const T*>(trans.Data());
+  int64_t* best_path_ptr = reinterpret_cast<int64_t*>(best_path->Data());
+  std::vector<T> scores(num_tags);
+  std::copy(slot_logits_ptr, slot_logits_ptr + num_tags, scores.begin());
+  std::vector<std::vector<T>> M(num_tags, std::vector<T>(num_tags));
+  for (int b = 0; b < batch_size; ++b) {
+    std::vector<std::vector<int>> paths;
+    const T* curr_slot_logits_ptr = slot_logits_ptr + b * seq_len * num_tags;
+    int64_t* curr_best_path_ptr = best_path_ptr + b * seq_len;
+    for (int t = 1; t < seq_len; t++) {
+      for (size_t i = 0; i < num_tags; i++) {
+        for (size_t j = 0; j < num_tags; j++) {
+          auto trans_idx = i * num_tags * num_tags + j * num_tags;
+          auto slot_logit_idx = t * num_tags + j;
+          M[i][j] = scores[i] + trans_ptr[trans_idx] +
+                    curr_slot_logits_ptr[slot_logit_idx];
+        }
+      }
+      std::vector<int> idxs;
+      for (size_t i = 0; i < num_tags; i++) {
+        T max = 0.0f;
+        int idx = 0;
+        for (size_t j = 0; j < num_tags; j++) {
+          if (M[j][i] > max) {
+            max = M[j][i];
+            idx = j;
+          }
+        }
+        scores[i] = max;
+        idxs.push_back(idx);
+      }
+      paths.push_back(idxs);
+    }
+    int scores_max_index = 0;
+    float scores_max = 0.0f;
+    for (size_t i = 0; i < scores.size(); i++) {
+      if (scores[i] > scores_max) {
+        scores_max = scores[i];
+        scores_max_index = i;
+      }
+    }
+    curr_best_path_ptr[seq_len - 1] = scores_max_index;
+    for (int i = seq_len - 2; i >= 0; i--) {
+      int index = curr_best_path_ptr[i + 1];
+      curr_best_path_ptr[i] = paths[i][index];
+    }
+  }
+}
+
+int main() {
+  // 1. Define a ernie faster tokenizer
+  faster_tokenizer::tokenizers_impl::ErnieFasterTokenizer tokenizer(
+      "ernie_vocab.txt");
+  std::vector<faster_tokenizer::core::EncodeInput> strings_list = {
+      "导航去科技园二号楼", "屏幕亮度为我减小一点吧"};
+  std::vector<faster_tokenizer::core::Encoding> encodings;
+  tokenizer.EncodeBatchStrings(strings_list, &encodings);
+  size_t batch_size = strings_list.size();
+  size_t seq_len = encodings[0].GetLen();
+  for (auto&& encoding : encodings) {
+    std::cout << encoding.DebugString() << std::endl;
+  }
+  // 2. Initialize runtime
+  fastdeploy::RuntimeOption runtime_option;
+  runtime_option.SetModelPath("nano_static/model.pdmodel",
+                              "nano_static/model.pdiparams");
+  fastdeploy::Runtime runtime;
+  runtime.Init(runtime_option);
+
+  // 3. Construct input vector
+  // 3.1 Convert encodings to input_ids, token_type_ids
+  std::vector<int64_t> input_ids, token_type_ids;
+  for (int i = 0; i < encodings.size(); ++i) {
+    auto&& curr_input_ids = encodings[i].GetIds();
+    auto&& curr_type_ids = encodings[i].GetTypeIds();
+    input_ids.insert(input_ids.end(), curr_input_ids.begin(),
+                     curr_input_ids.end());
+    token_type_ids.insert(token_type_ids.end(), curr_type_ids.begin(),
+                          curr_type_ids.end());
+  }
+  // 3.2 Set data to input vector
+  std::vector<fastdeploy::FDTensor> inputs(runtime.NumInputs());
+  void* inputs_ptrs[] = {input_ids.data(), token_type_ids.data()};
+  for (int i = 0; i < runtime.NumInputs(); ++i) {
+    inputs[i].SetExternalData({batch_size, seq_len},
+                              fastdeploy::FDDataType::INT64, inputs_ptrs[i]);
+    inputs[i].name = runtime.GetInputInfo(i).name;
+  }
+
+  // 4. Infer
+  std::vector<fastdeploy::FDTensor> outputs(runtime.NumOutputs());
+  runtime.Infer(inputs, &outputs);
+
+  // 5. Postprocess
+  fastdeploy::FDTensor domain_probs, intent_probs;
+  Softmax<float>(outputs[0], &domain_probs);
+  Softmax<float>(outputs[1], &intent_probs);
+
+  fastdeploy::FDTensor domain_max_probs, intent_max_probs;
+  Max<float>(domain_probs, &domain_max_probs);
+  Max<float>(intent_probs, &intent_max_probs);
+
+  std::vector<float> transition;
+  int num_tags;
+  LoadTransitionFromFile("joint_transition.txt", &transition, &num_tags);
+  fastdeploy::FDTensor trans;
+  trans.SetExternalData({num_tags, num_tags}, fastdeploy::FDDataType::FP32,
+                        transition.data());
+
+  fastdeploy::FDTensor best_path;
+  ViterbiDecode<float>(outputs[2], trans, &best_path);
+  // 6. Print result
+  domain_max_probs.PrintInfo();
+  intent_max_probs.PrintInfo();
+
+  batch_size = best_path.shape[0];
+  seq_len = best_path.shape[1];
+  const int64_t* best_path_ptr =
+      reinterpret_cast<const int64_t*>(best_path.Data());
+  for (int i = 0; i < batch_size; ++i) {
+    std::cout << "best_path[" << i << "] = ";
+    for (int j = 0; j < seq_len; ++j) {
+      std::cout << best_path_ptr[i * seq_len + j] << ", ";
+    }
+    std::cout << std::endl;
+  }
+  best_path.PrintInfo();
+  return 0;
+}
\ No newline at end of file
diff --git a/external/eigen.cmake b/external/eigen.cmake
new file mode 100644
index 00000000000..2248ee0fdbf
--- /dev/null
+++ b/external/eigen.cmake
@@ -0,0 +1,66 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+include(ExternalProject)
+
+# update eigen to the commit id f612df27 on 03/16/2021
+set(EIGEN_PREFIX_DIR ${THIRD_PARTY_PATH}/eigen3)
+set(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3/src/extern_eigen3)
+set(EIGEN_REPOSITORY https://gitlab.com/libeigen/eigen.git)
+set(EIGEN_TAG f612df273689a19d25b45ca4f8269463207c4fee)
+
+if(WIN32)
+  add_definitions(-DEIGEN_STRONG_INLINE=inline)
+elseif(LINUX)
+  if(WITH_ROCM)
+    # For HIPCC Eigen::internal::device::numeric_limits is not EIGEN_DEVICE_FUNC
+    # which will cause compiler error of using __host__ funciont
+    # in __host__ __device__
+    file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/eigen/Meta.h native_src)
+    file(TO_NATIVE_PATH ${EIGEN_SOURCE_DIR}/Eigen/src/Core/util/Meta.h
+         native_dst)
+    file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/eigen/TensorReductionGpu.h
+         native_src1)
+    file(
+      TO_NATIVE_PATH
+      ${EIGEN_SOURCE_DIR}/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
+      native_dst1)
+    set(EIGEN_PATCH_COMMAND cp ${native_src} ${native_dst} && cp ${native_src1}
+                            ${native_dst1})
+  endif()
+endif()
+
+set(EIGEN_INCLUDE_DIR ${EIGEN_SOURCE_DIR})
+include_directories(${EIGEN_INCLUDE_DIR})
+
+ExternalProject_Add(
+  extern_eigen3
+  GIT_REPOSITORY ${EIGEN_REPOSITORY}
+  GIT_TAG ${EIGEN_TAG}
+  PREFIX ${EIGEN_PREFIX_DIR}
+  UPDATE_COMMAND ""
+  PATCH_COMMAND ${EIGEN_PATCH_COMMAND}
+  CONFIGURE_COMMAND ""
+  BUILD_COMMAND ""
+  INSTALL_COMMAND ""
+  TEST_COMMAND "")
+
+add_library(eigen3 INTERFACE)
+
+add_dependencies(eigen3 extern_eigen3)
+
+# sw not support thread_local semantic
+if(WITH_SW)
+  add_definitions(-DEIGEN_AVOID_THREAD_LOCAL)
+endif()
diff --git a/external/faster_tokenizer.cmake b/external/faster_tokenizer.cmake
new file mode 100644
index 00000000000..fabc33db581
--- /dev/null
+++ b/external/faster_tokenizer.cmake
@@ -0,0 +1,79 @@
+
+
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+include(ExternalProject)
+
+set(FASTERTOKENIZER_PROJECT "extern_faster_tokenizer")
+set(FASTERTOKENIZER_PREFIX_DIR ${THIRD_PARTY_PATH}/faster_tokenizer)
+set(FASTERTOKENIZER_SOURCE_DIR
+    ${THIRD_PARTY_PATH}/faster_tokenizer/src/${FASTERTOKENIZER_PROJECT})
+set(FASTERTOKENIZER_INSTALL_DIR ${THIRD_PARTY_PATH}/install/faster_tokenizer)
+set(FASTERTOKENIZER_INC_DIR
+    "${FASTERTOKENIZER_INSTALL_DIR}/include"
+    "${FASTERTOKENIZER_INSTALL_DIR}/third_party/include"
+    CACHE PATH "faster_tokenizer include directory." FORCE)
+set(FASTERTOKENIZER_LIB_DIR
+    "${FASTERTOKENIZER_INSTALL_DIR}/lib/"
+    CACHE PATH "faster_tokenizer lib directory." FORCE)
+set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}"
+                      "${FASTERTOKENIZER_LIB_DIR}")
+
+include_directories(${FASTERTOKENIZER_INC_DIR})
+
+# Set lib path
+if(WIN32)
+elseif(APPLE)
+# Not support apple so far.
+else()
+
+set(FASTERTOKENIZER_COMPILE_LIB "${FASTERTOKENIZER_LIB_DIR}/libcore_tokenizers.so"
+    CACHE FILEPATH "faster_tokenizer compile library." FORCE)
+message("FASTERTOKENIZER_COMPILE_LIB = ${FASTERTOKENIZER_COMPILE_LIB}")
+set(ICUDT_LIB "")
+set(ICUUC_LIB "")
+endif(WIN32)
+
+set(FASTERTOKENIZER_URL_BASE "https://bj.bcebos.com/paddlenlp/faster_tokenizer/")
+set(FASTERTOKENIZER_VERSION "dev")
+
+# Set download url
+if(WIN32)
+elseif(APPLE)
+else()
+  if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
+    set(FASTERTOKENIZER_FILE "faster_tokenizer-linux-aarch64-${FASTERTOKENIZER_VERSION}.tgz")
+  else()
+    set(FASTERTOKENIZER_FILE "faster_tokenizer-linux-x64-${FASTERTOKENIZER_VERSION}.tgz")
+  endif()
+endif()
+set(FASTERTOKENIZER_URL "${FASTERTOKENIZER_URL_BASE}${FASTERTOKENIZER_FILE}")
+
+ExternalProject_Add(
+  ${FASTERTOKENIZER_PROJECT}
+  ${EXTERNAL_PROJECT_LOG_ARGS}
+  URL ${FASTERTOKENIZER_URL}
+  PREFIX ${FASTERTOKENIZER_PREFIX_DIR}
+  DOWNLOAD_NO_PROGRESS 1
+  CONFIGURE_COMMAND ""
+  BUILD_COMMAND ""
+  UPDATE_COMMAND ""
+  INSTALL_COMMAND
+    ${CMAKE_COMMAND} -E copy_directory ${FASTERTOKENIZER_SOURCE_DIR} ${FASTERTOKENIZER_INSTALL_DIR}
+  BUILD_BYPRODUCTS ${FASTERTOKENIZER_COMPILE_LIB})
+
+add_library(faster_tokenizer STATIC IMPORTED GLOBAL)
+set_property(TARGET faster_tokenizer PROPERTY IMPORTED_LOCATION ${FASTERTOKENIZER_COMPILE_LIB})
+add_dependencies(faster_tokenizer ${FASTERTOKENIZER_PROJECT})
+list(APPEND DEPEND_LIBS faster_tokenizer)
\ No newline at end of file
diff --git a/external/summary.cmake b/external/summary.cmake
index bd5e7939028..754af9c3ecb 100644
--- a/external/summary.cmake
+++ b/external/summary.cmake
@@ -45,6 +45,7 @@ function(fastdeploy_summary)
     message(STATUS "  TRT_DRECTORY              : ${TRT_DIRECTORY}")
   endif()
   message(STATUS "  ENABLE_VISION             : ${ENABLE_VISION}")
+  message(STATUS "  ENABLE_TEXT               : ${ENABLE_TEXT}")
   message(STATUS "  ENABLE_DEBUG              : ${ENABLE_DEBUG}")
   message(STATUS "  ENABLE_VISION_VISUALIZE   : ${ENABLE_VISION_VISUALIZE}")
 endfunction()
diff --git a/fastdeploy/text/__init__.py b/fastdeploy/text/__init__.py
new file mode 100644
index 00000000000..7d175762cf9
--- /dev/null
+++ b/fastdeploy/text/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
diff --git a/model_zoo/text/ernie-3.0/README.md b/model_zoo/text/ernie-3.0/README.md
new file mode 100755
index 00000000000..c601485795e
--- /dev/null
+++ b/model_zoo/text/ernie-3.0/README.md
@@ -0,0 +1,238 @@
+# ERNIE 3.0 Python部署指南
+本文介绍 ERNIE 3.0 Python 端的部署，包括部署环境的准备，序列标注和分类两大场景下的使用示例。
+- [ERNIE 3.0 Python 部署指南](#ERNIE3.0Python部署指南)
+  - [1. 环境准备](#1-环境准备)
+    - [1.1 CPU 端](#11-CPU端)
+    - [1.2 GPU 端](#12-GPU端)
+  - [2. 序列标注模型推理](#2-序列标注模型推理)
+    - [2.1 模型获取](#21-模型获取)
+    - [2.2 CPU 端推理样例](#22-CPU端推理样例)
+    - [2.3 GPU 端推理样例](#23-GPU端推理样例)
+  - [3. 分类模型推理](#3-分类模型推理)
+    - [3.1 模型获取](#31-模型获取)
+    - [3.2 CPU 端推理样例](#32-CPU端推理样例)
+    - [3.3 GPU 端推理样例](#33-GPU端推理样例)
+## 1. 环境准备
+ERNIE 3.0 的部署分为 CPU 和 GPU 两种情况，请根据你的部署环境安装对应的依赖。
+### 1.1 CPU端
+CPU 端的部署请使用如下命令安装所需依赖
+```
+pip install -r requirements_cpu.txt
+```
+### 1.2 GPU端
+为了在 GPU 上获得最佳的推理性能和稳定性，请先确保机器已正确安装 NVIDIA 相关驱动和基础软件，确保 CUDA >= 11.2，CuDNN >= 8.2，并使用以下命令安装所需依赖
+```
+pip install -r requirements_gpu.txt
+```
+如需使用半精度（FP16）或量化（INT8）部署，请确保GPU设备的 CUDA 计算能力 (CUDA Compute Capability) 大于 7.0，典型的设备包括 V100、T4、A10、A100、GTX 20 系列和 30 系列显卡等。同时 INT8 推理需要安装 TensorRT 以及包含 TensorRT 预测库的 PaddlePaddle。
+更多关于 CUDA Compute Capability 和精度支持情况请参考 NVIDIA 文档：[GPU硬件与支持精度对照表](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-840-ea/support-matrix/index.html#hardware-precision-matrix)
+
+1. TensorRT 安装请参考：[TensorRT安装说明](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-840-ea/install-guide/index.html#overview)，Linux 端简要步骤如下：
+
+    (1)下载 TensorRT8.2 版本，文件名 TensorRT-XXX.tar.gz，[下载链接](https://developer.nvidia.com/tensorrt)
+
+    (2)解压得到 TensorRT-XXX 文件夹
+
+    (3)通过 export LD_LIBRARY_PATH=TensorRT-XXX/lib:$LD_LIBRARY_PATH 将 lib 路径加入到 LD_LIBRARY_PATH 中
+
+    (4)使用 pip install 安装 TensorRT-XXX/python 中对应的 TensorRT 安装包
+
+2. PaddlePaddle 预测库的安装请参考 [PaddlePaddle 预测库安装文档](https://www.paddlepaddle.org.cn/inference/v2.3/user_guides/source_compile.html)，Linux 端简要步骤如下：
+
+    (1)根据 CUDA 环境和 Python 版本下载对应的 PaddlePaddle 预测库，注意须下载支持 TensorRT 的预测包，如 linux-cuda11.2-cudnn8.2-trt8-gcc8.2。[PaddlePaddle 预测库下载路径](https://www.paddlepaddle.org.cn/inference/v2.3/user_guides/download_lib.html#python)
+
+    (2)使用 pip install 安装下载好的 PaddlePaddle 预测库
+
+
+## 2. 序列标注模型推理
+### 2.1 模型获取
+用户可使用自己训练的模型进行推理，具体训练调优方法可参考[模型训练调优](./../../README.md#微调)，也可以使用我们提供的 msra_ner 数据集训练的 ERNIE 3.0 模型，请执行如下命令获取模型：
+```
+# 获取序列标注FP32模型
+wget https://paddlenlp.bj.bcebos.com/models/transformers/ernie_3.0/msra_ner_pruned_infer_model.zip
+unzip msra_ner_pruned_infer_model.zip
+```
+### 2.2 CPU端推理样例
+在 CPU 端，请使用如下命令进行部署
+```
+python infer_cpu.py --task_name token_cls --model_path ./msra_ner_pruned_infer_model/float32
+```
+输出打印如下:
+```
+input data: 北京的涮肉，重庆的火锅，成都的小吃都是极具特色的美食。
+The model detects all entities:
+entity: 北京   label: LOC   pos: [0, 1]
+entity: 重庆   label: LOC   pos: [6, 7]
+entity: 成都   label: LOC   pos: [12, 13]
+-----------------------------
+input data: 乔丹、科比、詹姆斯和姚明都是篮球界的标志性人物。
+The model detects all entities:
+entity: 乔丹   label: PER   pos: [0, 1]
+entity: 科比   label: PER   pos: [3, 4]
+entity: 詹姆斯   label: PER   pos: [6, 8]
+entity: 姚明   label: PER   pos: [10, 11]
+-----------------------------
+```
+infer_cpu.py 脚本中的参数说明：
+| 参数 |参数说明 |
+|----------|--------------|
+|--task_name | 配置任务名称，可选 seq_cls 或 token_cls，默认为 seq_cls|
+|--model_name_or_path | 模型的路径或者名字，默认为 ernie-3.0-medium-zh|
+|--model_path | 用于推理的 Paddle 模型的路径|
+|--max_seq_length |最大序列长度，默认为 128|
+|--precision_mode | 推理精度，可选 fp32，fp16 或者 int8，当输入非量化模型并设置 int8 时使用动态量化进行加速，默认 fp32 |
+|--num_threads | 配置 cpu 的线程数，默认为 cpu 的最大线程数 |
+
+**Note**：在支持 avx512_vnni 指令集或 Intel® DL Boost 的 CPU 设备上，可设置 precision_mode 为 int8 对 FP32 模型进行动态量化以获得更高的推理性能，具体性能提升情况请查阅[量化性能提升情况](../../README.md#压缩效果)。
+CPU 端，开启动态量化的命令如下：
+```
+python infer_cpu.py --task_name token_cls --model_path ./msra_ner_pruned_infer_model/float32 --precision_mode int8
+```
+INT8 的输出打印和 FP32 的输出打印一致。
+
+### 2.3 GPU端推理样例
+在 GPU 端，请使用如下命令进行部署
+```
+python infer_gpu.py --task_name token_cls --model_path ./msra_ner_pruned_infer_model/float32
+```
+输出打印如下:
+```
+input data: 北京的涮肉，重庆的火锅，成都的小吃都是极具特色的美食。
+The model detects all entities:
+entity: 北京   label: LOC   pos: [0, 1]
+entity: 重庆   label: LOC   pos: [6, 7]
+entity: 成都   label: LOC   pos: [12, 13]
+-----------------------------
+input data: 乔丹、科比、詹姆斯和姚明都是篮球界的标志性人物。
+The model detects all entities:
+entity: 乔丹   label: PER   pos: [0, 1]
+entity: 科比   label: PER   pos: [3, 4]
+entity: 詹姆斯   label: PER   pos: [6, 8]
+entity: 姚明   label: PER   pos: [10, 11]
+-----------------------------
+```
+如果需要 FP16 进行加速，可以设置 precision_mode 为 fp16，具体命令为
+```
+python infer_gpu.py --task_name token_cls --model_path ./msra_ner_pruned_infer_model/float32 --precision_mode fp16
+```
+如果需要进行 INT8 量化加速，还需要使用量化脚本对训练好的 FP32 模型进行量化，然后使用量化后的模型进行部署，模型的量化请参考：[模型量化脚本使用说明](./../../README.md#模型压缩)，也可下载我们量化后的 INT8 模型进行部署，请执行如下命令获取模型：
+```
+# 获取序列标注 INT8 量化模型
+wget https://paddlenlp.bj.bcebos.com/models/transformers/ernie_3.0/msra_ner_quant_infer_model.zip
+unzip msra_ner_quant_infer_model.zip
+```
+量化模型的部署命令为：
+```
+# 第一步，打开 set_dynamic_shape 开关，自动配置动态shape，在当前目录下生成 dynamic_shape_info.txt 文件
+python infer_gpu.py --task_name token_cls --model_path ./msra_ner_quant_infer_model/int8 --shape_info_file dynamic_shape_info.txt --set_dynamic_shape
+# 第二步，读取上一步中生成的 dynamic_shape_info.txt 文件，开启预测
+python infer_gpu.py --task_name token_cls --model_path ./msra_ner_quant_infer_model/int8 --shape_info_file dynamic_shape_info.txt
+```
+FP16 和 INT8 推理的运行结果和FP32的运行结果一致。
+
+infer_gpu.py 脚本中的参数说明：
+| 参数 |参数说明 |
+|----------|--------------|
+|--task_name | 配置任务名称，可选 seq_cls 或 token_cls，默认为 seq_cls|
+|--model_name_or_path | 模型的路径或者名字，默认为ernie-3.0-medium-zh|
+|--model_path | 用于推理的 Paddle 模型的路径|
+|--batch_size |最大可测的 batch size，默认为 32|
+|--max_seq_length |最大序列长度，默认为 128|
+|--shape_info_file | 指定 dynamic shape info 的存储文件名，默认为 shape_info.txt |
+|--set_dynamic_shape | 配置是否自动配置 TensorRT 的 dynamic shape，在GPU上INT8量化推理时需要先开启此选项进行 dynamic shape 配置，生成 shape_info.txt 后再关闭，默认关闭 |
+|--precision_mode | 推理精度，可选 fp32，fp16 或者 int8，默认 fp32 |
+
+## 3. 分类模型推理
+### 3.1 模型获取
+用户可使用自己训练的模型进行推理，具体训练调优方法可参考[模型训练调优](./../../README.md#微调)，也可以使用我们提供的 tnews 数据集训练的 ERNIE 3.0 模型，请执行如下命令获取模型：
+```
+# 分类模型模型：
+wget  https://paddlenlp.bj.bcebos.com/models/transformers/ernie_3.0/tnews_pruned_infer_model.zip
+unzip tnews_pruned_infer_model.zip
+```
+### 3.2 CPU端推理样例
+在 CPU 端，请使用如下命令进行部署
+```
+python infer_cpu.py --task_name seq_cls --model_path ./tnews_pruned_infer_model/float32
+```
+输出打印如下:
+```
+input data: 未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗？
+seq cls result:
+label: news_car   confidence: 0.5543532371520996
+-----------------------------
+input data: 黄磊接受华少快问快答，不光智商逆天，情商也不逊黄渤
+seq cls result:
+label: news_entertainment   confidence: 0.9495906829833984
+-----------------------------
+```
+和序列标注模型推理类似，使用动态量化进行加速的命令如下：
+```
+python infer_cpu.py --task_name seq_cls --model_path ./tnews_pruned_infer_model/float32 --precision_mode int8
+```
+输出打印如下:
+```
+input data: 未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗？
+seq cls result:
+label: news_car   confidence: 0.5778735876083374
+-----------------------------
+input data: 黄磊接受华少快问快答，不光智商逆天，情商也不逊黄渤
+seq cls result:
+label: news_entertainment   confidence: 0.9206441044807434
+-----------------------------
+```
+### 3.3 GPU端推理样例
+在 GPU 端，请使用如下命令进行部署
+```
+python infer_gpu.py --task_name seq_cls --model_path ./tnews_pruned_infer_model/float32
+```
+输出打印如下:
+```
+input data: 未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗？
+seq cls result:
+label: news_car   confidence: 0.5543532371520996
+-----------------------------
+input data: 黄磊接受华少快问快答，不光智商逆天，情商也不逊黄渤
+seq cls result:
+label: news_entertainment   confidence: 0.9495906829833984
+-----------------------------
+```
+如果需要 FP16 进行加速，可以设置 precision_mode 为 fp16，具体命令为
+```
+python infer_gpu.py --task_name seq_cls --model_path ./tnews_pruned_infer_model/float32 --precision_mode fp16
+```
+输出打印如下:
+```
+input data: 未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗？
+seq cls result:
+label: news_car   confidence: 0.5536671876907349
+-----------------------------
+input data: 黄磊接受华少快问快答，不光智商逆天，情商也不逊黄渤
+seq cls result:
+label: news_entertainment   confidence: 0.9494127035140991
+-----------------------------
+```
+如果需要进行 INT8 量化加速，还需要使用量化脚本对训练好的 FP32 模型进行量化，然后使用量化后的模型进行部署，模型的量化请参考：[模型量化脚本使用说明](./../../README.md#模型压缩)，也可下载我们量化后的 INT8 模型进行部署，请执行如下命令获取模型：
+```
+# 获取序列标注 INT8 量化模型
+wget https://paddlenlp.bj.bcebos.com/models/transformers/ernie_3.0/tnews_quant_infer_model.zip
+unzip tnews_quant_infer_model.zip
+```
+量化模型的部署命令为：
+```
+# 第一步，打开 set_dynamic_shape 开关，自动配置动态shape，在当前目录下生成 dynamic_shape_info.txt 文件
+python infer_gpu.py --task_name seq_cls --model_path ./tnews_quant_infer_model/int8 --shape_info_file dynamic_shape_info.txt --set_dynamic_shape
+# 第二步，读取上一步中生成的 dynamic_shape_info.txt 文件，开启预测
+python infer_gpu.py --task_name seq_cls --model_path ./tnews_quant_infer_model/int8 --shape_info_file dynamic_shape_info.txt
+```
+输出打印如下:
+```
+input data: 未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗？
+seq cls result:
+label: news_car   confidence: 0.5510320067405701
+-----------------------------
+input data: 黄磊接受华少快问快答，不光智商逆天，情商也不逊黄渤
+seq cls result:
+label: news_entertainment   confidence: 0.9432708024978638
+-----------------------------
+```
diff --git a/model_zoo/text/ernie-3.0/ernie_predictor.py b/model_zoo/text/ernie-3.0/ernie_predictor.py
new file mode 100755
index 00000000000..61162de970b
--- /dev/null
+++ b/model_zoo/text/ernie-3.0/ernie_predictor.py
@@ -0,0 +1,242 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import six
+import os
+import numpy as np
+# import paddle
+from psutil import cpu_count
+from paddlenlp.transformers import AutoTokenizer
+import fastdeploy
+
+
+def token_cls_print_ret(infer_result, input_data):
+    rets = infer_result["value"]
+    for i, ret in enumerate(rets):
+        print("input data:", input_data[i])
+        print("The model detects all entities:")
+        for iterm in ret:
+            print("entity:", iterm["entity"], "  label:", iterm["label"],
+                  "  pos:", iterm["pos"])
+        print("-----------------------------")
+
+
+def seq_cls_print_ret(infer_result, input_data):
+    label_list = [
+        "news_story", "news_culture", "news_entertainment", "news_sports",
+        "news_finance", "news_house", "news_car", "news_edu", "news_tech",
+        "news_military", "news_travel", "news_world", "news_stock",
+        "news_agriculture", "news_game"
+    ]
+    label = infer_result["label"].squeeze().tolist()
+    confidence = infer_result["confidence"].squeeze().tolist()
+    for i, ret in enumerate(infer_result):
+        print("input data:", input_data[i])
+        print("seq cls result:")
+        print("label:", label_list[label[i]], "  confidence:", confidence[i])
+        print("-----------------------------")
+
+
+class ErniePredictor(object):
+    def __init__(self, args):
+        if not isinstance(args.device, six.string_types):
+            print(
+                ">>> [InferBackend] The type of device must be string, but the type you set is: ",
+                type(device))
+            exit(0)
+        args.device = args.device.lower()
+        if args.device not in ['cpu', 'gpu', 'xpu']:
+            print(
+                ">>> [InferBackend] The device must be cpu or gpu, but your device is set to:",
+                type(args.device))
+            exit(0)
+
+        self.task_name = args.task_name
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            args.model_name_or_path, use_faster=True)
+        if args.task_name == 'seq_cls':
+            self.label_names = []
+            self.preprocess = self.seq_cls_preprocess
+            self.postprocess = self.seq_cls_postprocess
+            self.printer = seq_cls_print_ret
+        elif args.task_name == 'token_cls':
+            self.label_names = [
+                'O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC'
+            ]
+            self.preprocess = self.token_cls_preprocess
+            self.postprocess = self.token_cls_postprocess
+            self.printer = token_cls_print_ret
+        else:
+            print(
+                "[ErniePredictor]: task_name only support seq_cls and token_cls now."
+            )
+            exit(0)
+
+        self.max_seq_length = args.max_seq_length
+
+        if args.device == 'cpu':
+            args.set_dynamic_shape = False
+            args.shape_info_file = None
+            args.batch_size = 32
+        if args.device == 'gpu':
+            args.num_threads = cpu_count(logical=False)
+        # Set the runtime option
+        runtime_option = fastdeploy.RuntimeOption()
+        runtime_option.set_model_path(args.model_path + ".pdmodel",
+                                      args.model_path + ".pdiparams")
+        precision_mode = args.precision_mode.lower()
+        use_fp16 = precision_mode == "fp16"
+        # runtime_option.use_paddle_backend()
+        if args.device == 'cpu':
+            runtime_option.use_cpu()
+            runtime_option.set_cpu_thread_num(args.num_threads)
+            if use_fp16:
+                runtime_option.enable_paddle_mkldnn()
+        elif args.device == 'gpu':
+            runtime_option.use_gpu()
+            if use_fp16:
+                runtime_option.use_trt_backend()
+                runtime_option.enable_trt_fp16()
+
+        self.inference_backend = fastdeploy.Runtime(runtime_option._option)
+        if args.set_dynamic_shape:
+            # If set_dynamic_shape is turned on, all required dynamic shapes will be
+            # automatically set according to the batch_size and max_seq_length.
+            self.set_dynamic_shape(args.max_seq_length, args.batch_size)
+            exit(0)
+
+    def seq_cls_preprocess(self, input_data: list):
+        data = input_data
+        # tokenizer + pad
+        data = self.tokenizer(
+            data,
+            max_length=self.max_seq_length,
+            padding=True,
+            truncation=True)
+        input_ids = data["input_ids"]
+        token_type_ids = data["token_type_ids"]
+        return {
+            "input_ids": np.array(
+                input_ids, dtype="int64"),
+            "token_type_ids": np.array(
+                token_type_ids, dtype="int64")
+        }
+
+    def seq_cls_postprocess(self, infer_data, input_data):
+        logits = np.array(infer_data[0])
+        max_value = np.max(logits, axis=1, keepdims=True)
+        exp_data = np.exp(logits - max_value)
+        probs = exp_data / np.sum(exp_data, axis=1, keepdims=True)
+        out_dict = {
+            "label": probs.argmax(axis=-1),
+            "confidence": probs.max(axis=-1)
+        }
+        return out_dict
+
+    def token_cls_preprocess(self, data: list):
+        # tokenizer + pad
+        is_split_into_words = False
+        if isinstance(data[0], list):
+            is_split_into_words = True
+        data = self.tokenizer(
+            data,
+            max_length=self.max_seq_length,
+            padding=True,
+            truncation=True,
+            is_split_into_words=is_split_into_words)
+
+        input_ids = data["input_ids"]
+        token_type_ids = data["token_type_ids"]
+        return {
+            "input_ids": np.array(
+                input_ids, dtype="int64"),
+            "token_type_ids": np.array(
+                token_type_ids, dtype="int64")
+        }
+
+    def token_cls_postprocess(self, infer_data, input_data):
+        result = np.array(infer_data[0])
+        tokens_label = result.argmax(axis=-1).tolist()
+        # 获取batch中每个token的实体
+        value = []
+        for batch, token_label in enumerate(tokens_label):
+            start = -1
+            label_name = ""
+            items = []
+            for i, label in enumerate(token_label):
+                if (self.label_names[label] == "O" or
+                        "B-" in self.label_names[label]) and start >= 0:
+                    entity = input_data[batch][start:i - 1]
+                    if isinstance(entity, list):
+                        entity = "".join(entity)
+                    items.append({
+                        "pos": [start, i - 2],
+                        "entity": entity,
+                        "label": label_name,
+                    })
+                    start = -1
+                if "B-" in self.label_names[label]:
+                    start = i - 1
+                    label_name = self.label_names[label][2:]
+            if start >= 0:
+                items.append({
+                    "pos": [start, len(token_label) - 1],
+                    "entity": input_data[batch][start:len(token_label) - 1],
+                    "label": ""
+                })
+            value.append(items)
+
+        out_dict = {"value": value, "tokens_label": tokens_label}
+        return out_dict
+
+    def set_dynamic_shape(self, max_seq_length, batch_size):
+        # The dynamic shape info required by TRT is automatically generated
+        # according to max_seq_length and batch_size and stored in shape_info.txt
+        min_batch_size, max_batch_size, opt_batch_size = 1, batch_size, batch_size
+        min_seq_len, max_seq_len, opt_seq_len = 2, max_seq_length, max_seq_length
+        batches = [
+            {
+                "input_ids": np.zeros(
+                    [min_batch_size, min_seq_len], dtype="int64"),
+                "token_type_ids": np.zeros(
+                    [min_batch_size, min_seq_len], dtype="int64")
+            },
+            {
+                "input_ids": np.zeros(
+                    [max_batch_size, max_seq_len], dtype="int64"),
+                "token_type_ids": np.zeros(
+                    [max_batch_size, max_seq_len], dtype="int64")
+            },
+            {
+                "input_ids": np.zeros(
+                    [opt_batch_size, opt_seq_len], dtype="int64"),
+                "token_type_ids": np.zeros(
+                    [opt_batch_size, opt_seq_len], dtype="int64")
+            },
+        ]
+        for batch in batches:
+            self.inference_backend.infer(batch)
+        print(
+            "[InferBackend] Set dynamic shape finished, please close set_dynamic_shape and restart."
+        )
+
+    def infer(self, data):
+        return self.inference_backend.infer(data)
+
+    def predict(self, input_data: list):
+        preprocess_result = self.preprocess(input_data)
+        infer_result = self.infer(preprocess_result)
+        result = self.postprocess(infer_result, input_data)
+        self.printer(result, input_data)
+        return result
diff --git a/model_zoo/text/ernie-3.0/infer_cpu.py b/model_zoo/text/ernie-3.0/infer_cpu.py
new file mode 100755
index 00000000000..3ab8121a529
--- /dev/null
+++ b/model_zoo/text/ernie-3.0/infer_cpu.py
@@ -0,0 +1,78 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import argparse
+from psutil import cpu_count
+from ernie_predictor import ErniePredictor
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    # Required parameters
+    parser.add_argument(
+        "--task_name",
+        default='seq_cls',
+        type=str,
+        help="The name of the task to perform predict, selected in: seq_cls and token_cls"
+    )
+    parser.add_argument(
+        "--model_name_or_path",
+        default="ernie-3.0-medium-zh",
+        type=str,
+        help="The directory or name of model.", )
+    parser.add_argument(
+        "--model_path",
+        type=str,
+        required=True,
+        help="The path prefix of inference model to be used.", )
+    parser.add_argument(
+        "--max_seq_length",
+        default=128,
+        type=int,
+        help="The maximum total input sequence length after tokenization. Sequences longer "
+        "than this will be truncated, sequences shorter will be padded.", )
+    parser.add_argument(
+        "--precision_mode",
+        type=str,
+        default="fp32",
+        choices=["fp32", "int8"],
+        help="Inference precision, set int8 to use dynamic quantization for acceleration.",
+    )
+    parser.add_argument(
+        "--num_threads",
+        default=cpu_count(logical=False),
+        type=int,
+        help="num_threads for cpu.", )
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+
+    args.task_name = args.task_name.lower()
+    args.device = 'cpu'
+    predictor = ErniePredictor(args)
+
+    if args.task_name == 'seq_cls':
+        text = ["未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗？", "黄磊接受华少快问快答，不光智商逆天，情商也不逊黄渤"]
+    elif args.task_name == 'token_cls':
+        text = ["北京的涮肉，重庆的火锅，成都的小吃都是极具特色的美食。", "乔丹、科比、詹姆斯和姚明都是篮球界的标志性人物。"]
+
+    outputs = predictor.predict(text)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/model_zoo/text/ernie-3.0/infer_gpu.py b/model_zoo/text/ernie-3.0/infer_gpu.py
new file mode 100755
index 00000000000..4175a29290c
--- /dev/null
+++ b/model_zoo/text/ernie-3.0/infer_gpu.py
@@ -0,0 +1,84 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import argparse
+from ernie_predictor import ErniePredictor
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    # Required parameters
+    parser.add_argument(
+        "--task_name",
+        default='seq_cls',
+        type=str,
+        help="The name of the task to perform predict, selected in: seq_cls and token_cls"
+    )
+    parser.add_argument(
+        "--model_name_or_path",
+        default="ernie-3.0-medium-zh",
+        type=str,
+        help="The directory or name of model.", )
+    parser.add_argument(
+        "--model_path",
+        type=str,
+        required=True,
+        help="The path prefix of inference model to be used.", )
+    parser.add_argument(
+        "--batch_size",
+        default=32,
+        type=int,
+        help="Batch size for predict.", )
+    parser.add_argument(
+        "--max_seq_length",
+        default=128,
+        type=int,
+        help="The maximum total input sequence length after tokenization. Sequences longer "
+        "than this will be truncated, sequences shorter will be padded.", )
+    parser.add_argument(
+        "--set_dynamic_shape",
+        action='store_true',
+        help="Whether to automatically set dynamic shape.", )
+    parser.add_argument(
+        "--shape_info_file",
+        default="shape_info.txt",
+        type=str,
+        help="The collected dynamic shape info file.", )
+    parser.add_argument(
+        "--precision_mode",
+        type=str,
+        default="fp32",
+        choices=["fp32", "fp16", "int8"],
+        help="Inference precision.", )
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    args.task_name = args.task_name.lower()
+    args.device = 'gpu'
+    predictor = ErniePredictor(args)
+
+    if args.task_name == 'seq_cls':
+        text = ["未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗？", "黄磊接受华少快问快答，不光智商逆天，情商也不逊黄渤"]
+    elif args.task_name == 'token_cls':
+        text = ["北京的涮肉，重庆的火锅，成都的小吃都是极具特色的美食。", "乔丹、科比、詹姆斯和姚明都是篮球界的标志性人物。"]
+
+    outputs = predictor.predict(text)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/model_zoo/text/ernie-3.0/requirements_cpu.txt b/model_zoo/text/ernie-3.0/requirements_cpu.txt
new file mode 100755
index 00000000000..9725b91945d
--- /dev/null
+++ b/model_zoo/text/ernie-3.0/requirements_cpu.txt
@@ -0,0 +1,3 @@
+onnxruntime
+psutil
+paddlenlp
diff --git a/model_zoo/text/ernie-3.0/requirements_gpu.txt b/model_zoo/text/ernie-3.0/requirements_gpu.txt
new file mode 100755
index 00000000000..bd5e113bfdc
--- /dev/null
+++ b/model_zoo/text/ernie-3.0/requirements_gpu.txt
@@ -0,0 +1,4 @@
+onnxruntime-gpu
+onnxconverter-common
+psutil
+paddlenlp