diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 96faaae05d8c00..dd922cd45dd34c 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -1992,7 +1992,7 @@ CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
           LOG(ERROR)
               << "Allocate too much memory for the GPU memory pool, assigned "
               << config.memory_pool_init_size_mb() << " MB";
-          LOG(ERROR) << "Try to shink the value by setting "
+          LOG(ERROR) << "Try to shrink the value by setting "
                         "AnalysisConfig::EnableUseGpu(...)";
         }
         if (fraction_of_gpu_memory >= 0.0f || fraction_of_gpu_memory <= 0.95f) {
diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h
index 252105b81900f0..24e8cc1cbe8502 100644
--- a/paddle/fluid/inference/api/analysis_predictor.h
+++ b/paddle/fluid/inference/api/analysis_predictor.h
@@ -309,7 +309,7 @@ class AnalysisPredictor : public PaddlePredictor {
 
   ///
   /// \brief Register a output hook function to operate the intermediate tensor
-  /// of op output. when using this function, memory reuse should be tured off.
+  /// of op output. when using this function, memory reuse should be turned off.
   /// The hook function signature is void(const std::string&, const
   /// std::string&, const paddle::Tensor&>). Here, the first parameter is op's
   /// type, the second param is output var name of the op, and the third
diff --git a/paddle/fluid/inference/api/api.cc b/paddle/fluid/inference/api/api.cc
index e58849b6c28fb9..da29b3124fa72b 100644
--- a/paddle/fluid/inference/api/api.cc
+++ b/paddle/fluid/inference/api/api.cc
@@ -122,7 +122,7 @@ void PaddleBuf::Free() {
 
 NativeConfig::NativeConfig() {
   LOG(WARNING) << "The paddle::NativeConfig interface is going to be "
-                  "deprecated in the next release, plase use the latest "
+                  "deprecated in the next release, please use the latest "
                   "paddle_infer::Config instead.";
 }
 
diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc
index d886885edb5ba5..c8eaa1c3ebd1e4 100644
--- a/paddle/fluid/inference/api/api_impl.cc
+++ b/paddle/fluid/inference/api/api_impl.cc
@@ -63,7 +63,7 @@ bool NativePaddlePredictor::Init(
     std::shared_ptr<framework::Scope> parent_scope) {
   VLOG(3) << "Predictor::init()";
   if (FLAGS_profile) {
-    LOG(WARNING) << "Profiler is actived, might affect the performance";
+    LOG(WARNING) << "Profiler is activated, might affect the performance";
     LOG(INFO) << "You can turn off by set gflags '-profile false'";
 
     auto tracking_device = config_.use_gpu ? platform::ProfilerState::kAll
diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
index eee3a707a03b14..fc180e761c5614 100644
--- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc
+++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
@@ -115,7 +115,7 @@ T *Tensor::mutable_data(PlaceType place) {
       auto *dev_ctxs = reinterpret_cast<const std::map<
           phi::Place,
           std::shared_future<std::unique_ptr<phi::DeviceContext>>> *>(
-          device_contexs_);
+          device_contexts_);
       auto *dev_ctx =
           static_cast<phi::GPUContext *>(dev_ctxs->at(gpu_place).get().get());
       return dev_ctx->Alloc<T>(tensor, tensor->numel() * sizeof(T));
@@ -214,7 +214,7 @@ void Tensor::CopyFromCpu(const T *data) {
     auto *dev_ctxs = reinterpret_cast<const std::map<
         phi::Place,
         std::shared_future<std::unique_ptr<phi::DeviceContext>>> *>(
-        device_contexs_);
+        device_contexts_);
     auto *dev_ctx =
         static_cast<phi::GPUContext *>(dev_ctxs->at(gpu_place).get().get());
     auto *t_data = dev_ctx->Alloc<T>(tensor, tensor->numel() * sizeof(T));
@@ -429,7 +429,7 @@ void Tensor::CopyToCpuImpl(T *data,
     auto *dev_ctxs = reinterpret_cast<const std::map<
         phi::Place,
         std::shared_future<std::unique_ptr<phi::DeviceContext>>> *>(
-        device_contexs_);
+        device_contexts_);
     auto *dev_ctx =
         static_cast<phi::GPUContext *>(dev_ctxs->at(gpu_place).get().get());
     paddle::memory::Copy(paddle::platform::CPUPlace(),
@@ -672,7 +672,7 @@ template PD_INFER_DECL bfloat16 *Tensor::mutable_data<bfloat16>(
 template PD_INFER_DECL bool *Tensor::mutable_data<bool>(PlaceType place);
 
 Tensor::Tensor(void *scope, const void *device_contexts)
-    : scope_{scope}, device_contexs_(device_contexts) {}
+    : scope_{scope}, device_contexts_(device_contexts) {}
 
 template <typename T>
 void *Tensor::FindTensor() const {
diff --git a/paddle/fluid/inference/api/mkldnn_quantizer_config.cc b/paddle/fluid/inference/api/mkldnn_quantizer_config.cc
index 36c312fb793a27..da20870eb0f5c1 100644
--- a/paddle/fluid/inference/api/mkldnn_quantizer_config.cc
+++ b/paddle/fluid/inference/api/mkldnn_quantizer_config.cc
@@ -19,7 +19,7 @@
 namespace paddle {
 
 MkldnnQuantizerConfig::MkldnnQuantizerConfig() {
-  // The default configuration of scale computing algorightms
+  // The default configuration of scale computing algorithms
   rules_["conv2d"]["Input"] = ScaleAlgo::KL;
   rules_["conv2d"]["Filter"] = ScaleAlgo::MAX_CH;
   rules_["conv2d"]["Bias"] = ScaleAlgo::NONE;  // do not compute scale
diff --git a/paddle/fluid/inference/api/onnxruntime_predictor.h b/paddle/fluid/inference/api/onnxruntime_predictor.h
index c983f8acdae281..b52a40d29ff26b 100644
--- a/paddle/fluid/inference/api/onnxruntime_predictor.h
+++ b/paddle/fluid/inference/api/onnxruntime_predictor.h
@@ -167,7 +167,7 @@ class ONNXRuntimePredictor : public PaddlePredictor {
   ///
   std::map<std::string, std::vector<int64_t>> GetInputTensorShape() override;
 
-  /// Not supoort
+  /// Not support
   bool Run(const std::vector<PaddleTensor> &inputs,
            std::vector<PaddleTensor> *output_data,
            int batch_size = -1) override;
@@ -216,7 +216,7 @@ class ONNXRuntimePredictor : public PaddlePredictor {
 
   /// \brief get the Ort Value(input Tensor).
   ///
-  /// \param[in] desc ONNXDesce(name、shape、dtype)
+  /// \param[in] desc ONNXDesc(name、shape、dtype)
   ///
   /// \param[in] device_name "cpu" or "gpu" of device
   ///
diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h
index 6a3e943dec7e9a..e69710e93c8f5b 100644
--- a/paddle/fluid/inference/api/paddle_analysis_config.h
+++ b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -94,7 +94,7 @@ struct PD_INFER_DECL XpuConfig {
 
   // Reserved xpu global memory size for xpu_context;
   // If not set(-1), default memory size for xpu_context is 128MB in XPU2 or
-  // 64MB in XPU1. If set 1*1024*1024, memory size for xpu_conext will be 1MB;
+  // 64MB in XPU1. If set 1*1024*1024, memory size for xpu_context will be 1MB;
   int context_gm_size{-1};
   // xpu_context(from baidu::xpu::api::create_context) for execution.
   // If context is nullptr, new context will be created by default.
@@ -207,7 +207,7 @@ struct DistConfig {
 /// During inference procedure, there are many parameters(model/params path,
 /// place of inference, etc.)
 /// to be specified, and various optimizations(subgraph fusion, memory
-/// optimazation, TensorRT engine, etc.)
+/// optimization, TensorRT engine, etc.)
 /// to be done. Users can manage these settings by creating and modifying an
 /// AnalysisConfig,
 /// and loading it into AnalysisPredictor.
@@ -558,7 +558,7 @@ struct PD_INFER_DECL AnalysisConfig {
   /// \return string The custom device type.
   ///
   std::string custom_device_type() const { return custom_device_type_; }
-  /// \brief Get whether the custom device mixed preicsion is enabled.
+  /// \brief Get whether the custom device mixed precision is enabled.
   ///
   /// \return bool custom device mixed is enabled.
   ///
@@ -653,7 +653,7 @@ struct PD_INFER_DECL AnalysisConfig {
 
   ///
   /// \brief Turn on the TensorRT engine.
-  /// The TensorRT engine will accelerate some subgraphes in the original Fluid
+  /// The TensorRT engine will accelerate some subgraphs in the original Fluid
   /// computation graph. In some models such as resnet50, GoogleNet and so on,
   /// it gains significant performance acceleration.
   ///
@@ -896,7 +896,7 @@ struct PD_INFER_DECL AnalysisConfig {
   ///
   /// \brief Turn on the usage of Lite sub-graph engine.
   ///
-  /// \param precision_mode Precion used in Lite sub-graph engine.
+  /// \param precision_mode Precision used in Lite sub-graph engine.
   /// \param passes_filter Set the passes used in Lite sub-graph engine.
   /// \param ops_filter Operators not supported by Lite.
   ///
diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h
index 89540a91e37895..2828fd65a6ee72 100644
--- a/paddle/fluid/inference/api/paddle_api.h
+++ b/paddle/fluid/inference/api/paddle_api.h
@@ -289,12 +289,11 @@ class PD_INFER_DECL PaddlePredictor {
   }
   /// \brief Run the network with zero-copied inputs and outputs.
   /// Be inherited by AnalysisPredictor and only used in ZeroCopy scenarios.
-  /// This will save the IO copy for transfering inputs and outputs to predictor
-  /// workspace
-  /// and get some performance improvement.
-  /// To use it, one should call the AnalysisConfig.SwitchUseFeedFetchOp(false)
-  /// and then use the `GetInputTensor` and `GetOutputTensor`
-  /// to directly write or read the input/output tensors.
+  /// This will save the IO copy for transferring inputs and outputs to
+  /// predictor workspace and get some performance improvement. To use it, one
+  /// should call the AnalysisConfig.SwitchUseFeedFetchOp(false) and then use
+  /// the `GetInputTensor` and `GetOutputTensor` to directly write or read the
+  /// input/output tensors.
   /// \param switch_stream Whether the stream is switched.
   /// \return Whether the run is successful
   virtual bool ZeroCopyRun(bool switch_stream = false) { return false; }
@@ -318,7 +317,7 @@ class PD_INFER_DECL PaddlePredictor {
 
   ///
   /// \brief Register a output hook function to operate the intermediate tensor
-  /// of op output. when using this function, memory reuse should be tured off.
+  /// of op output. when using this function, memory reuse should be turned off.
   /// The hook function signature is void(const std::string&, const
   /// std::string&, const paddle::Tensor&>). Here, the first parameter is op's
   /// type, the second param is output var name of the op, and the third
diff --git a/paddle/fluid/inference/api/paddle_tensor.h b/paddle/fluid/inference/api/paddle_tensor.h
index 9bbb494f91edd7..22cd023c1fee24 100644
--- a/paddle/fluid/inference/api/paddle_tensor.h
+++ b/paddle/fluid/inference/api/paddle_tensor.h
@@ -142,8 +142,8 @@ class PD_INFER_DECL Tensor {
 
   /// \brief Copy the tensor data to the host memory asynchronously.
   /// \param[out] data The tensor will copy the data to the address.
-  /// \param[out] exec_stream The tensor will excute copy in this stream(Only
-  /// GPU CUDA stream suppported now).
+  /// \param[out] exec_stream The tensor will execute copy in this stream(Only
+  /// GPU CUDA stream supported now).
   template <typename T>
   void CopyToCpuAsync(T* data, void* exec_stream) const;
 
@@ -177,7 +177,7 @@ class PD_INFER_DECL Tensor {
   PlaceType place() const;
 
  protected:
-  explicit Tensor(void* scope, const void* device_contexs);
+  explicit Tensor(void* scope, const void* device_contexts);
 
   template <typename T>
   void* FindTensor() const;
@@ -201,7 +201,7 @@ class PD_INFER_DECL Tensor {
   DataType dtype_;
   bool input_or_output_;
   void* scope_{nullptr};
-  const void* device_contexs_{nullptr};
+  const void* device_contexts_{nullptr};
   PlaceType place_;
   int device_;
   std::string device_type_;