diff --git a/paddle/common/enforce.h b/paddle/common/enforce.h
index b734c90d0672bc..bb88d0db9e546c 100644
--- a/paddle/common/enforce.h
+++ b/paddle/common/enforce.h
@@ -187,8 +187,8 @@ class LogSimplyStrRegistrar {
 }  // namespace enforce
 }  // namespace common
 
-// TODO(zhangbopd): This is a copy from pir, and shoud be removed after merge
-// this into common enfoce namespace above.
+// TODO(zhangbopd): This is a copy from pir, and should be removed after merge
+// this into common enforce namespace above.
 template <typename T>
 inline bool is_error(const T& stat) {
   return !stat;
diff --git a/paddle/fluid/distributed/auto_parallel/spmd_rules/common.cc b/paddle/fluid/distributed/auto_parallel/spmd_rules/common.cc
index 09b4d6a2189b7a..d38de8d90e2e4c 100644
--- a/paddle/fluid/distributed/auto_parallel/spmd_rules/common.cc
+++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/common.cc
@@ -82,7 +82,7 @@ std::unordered_map<std::string, int64_t> ShardingMergeForTensors(
     }
   }
 
-  // Resolute "mesh_dim shard by more than one axis" confict.
+  // Resolute "mesh_dim shard by more than one axis" conflict.
   // Now we just naive pick the first axis naively.
   // (TODO) use local cost model to pick the axis with lowest cost(in concern of
   // memory or communication or computation).
@@ -107,7 +107,7 @@ std::unordered_map<std::string, int64_t> ShardingMergeForTensors(
   return axis_to_dim_map;
 }
 
-// Rule1: A repicated dimension could be merged by any sharded dimension.
+// Rule1: A replicated dimension could be merged by any sharded dimension.
 // Rule2: A tensor axis could at most be sharded by one mesh dimension.
 // (TODO trigger heuristics cost model and reshard to handle axis sharded by
 // multiple dimension case.)
@@ -159,7 +159,7 @@ std::vector<int64_t> ResoluteOutputPartialDimension(
   return partial_on_dims;
 }
 
-std::string GetBroadcastAxes(const int64_t& tenosr_ndim,
+std::string GetBroadcastAxes(const int64_t& tensor_ndim,
                              const int64_t& broadcast_ndim,
                              const std::string& alphabet) {
   PADDLE_ENFORCE_GE(
@@ -170,15 +170,15 @@ std::string GetBroadcastAxes(const int64_t& tenosr_ndim,
           alphabet.size(),
           broadcast_ndim));
   PADDLE_ENFORCE_GE(broadcast_ndim,
-                    tenosr_ndim,
+                    tensor_ndim,
                     phi::errors::InvalidArgument(
-                        "broadcast ndim [%d] is less than tenosr ndim [%d]",
+                        "broadcast ndim [%d] is less than tensor ndim [%d]",
                         broadcast_ndim,
-                        tenosr_ndim));
-  if (tenosr_ndim <= 0) {
+                        tensor_ndim));
+  if (tensor_ndim <= 0) {
     return std::string();
   }
-  return alphabet.substr(broadcast_ndim - tenosr_ndim, tenosr_ndim);
+  return alphabet.substr(broadcast_ndim - tensor_ndim, tensor_ndim);
 }
 
 TensorDistAttr ReplicatedOnMesh(const TensorDistAttr& src_dist_attr) {
diff --git a/paddle/fluid/distributed/auto_parallel/spmd_rules/common.h b/paddle/fluid/distributed/auto_parallel/spmd_rules/common.h
index dd493276548f08..9f6a52750580bc 100644
--- a/paddle/fluid/distributed/auto_parallel/spmd_rules/common.h
+++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/common.h
@@ -41,12 +41,12 @@ class SPMDRuleBase {
   // 1. Merge the Sharding (dims_mapping) among Input Tensors.
   // 2. Infer the Sharding (dims_mapping) for Output Tensors.
   // The Info of input tensors (Shape and DistAttr) are wrapped as
-  // DistTensorSpec, and  op attribtue should be given as AttributeMap. The
+  // DistTensorSpec, and  op attribute should be given as AttributeMap. The
   // Output is a pair consist of two vectors:
   // 1. The first vector: the merged DistAttr of input tensors.
-  // 2. The infered DistAttr of output tensors.
+  // 2. The inferred DistAttr of output tensors.
   // The Merged DistAttr might be different from the original Intput DistAttrs,
-  // which means that the corressponding input tensor need to be reshard.
+  // which means that the corresponding input tensor need to be reshard.
   virtual std::pair<std::vector<TensorDistAttr>, std::vector<TensorDistAttr>>
   InferForward(const std::vector<DistTensorSpec>& input_specs,
                const paddle::framework::AttributeMap& attrs);
@@ -55,10 +55,10 @@ class SPMDRuleBase {
   // 1. Merge the Sharding (dims_mapping) among Output Tensors.
   // 2. Infer the Sharding (dims_mapping) for Input Tensors.
   // The Info of output tensors (Shape and DistAttr) are wrapped as
-  // DistTensorSpec, and  op attribtue should be given as AttributeMap. The
+  // DistTensorSpec, and  op attribute should be given as AttributeMap. The
   // Output is a pair consist of two vectors:
   // 1. The first vector: the merged DistAttr of output tensors.
-  // 2. The infered DistAttr of Input tensors.
+  // 2. The inferred DistAttr of Input tensors.
   virtual std::pair<std::vector<TensorDistAttr>, std::vector<TensorDistAttr>>
   InferBackward(const std::vector<DistTensorSpec>& input_specs,
                 const std::vector<DistTensorSpec>& output_specs,
@@ -96,7 +96,7 @@ std::unordered_map<std::string, int64_t> ShardingMergeForTensors(
     const bool merge_conflicts = true);
 
 // Merge the sharding specification (dims mapping) for one tensor Axis.
-// Rule1: A repicated dimension could be merged by any sharded dimension.
+// Rule1: A replicated dimension could be merged by any sharded dimension.
 // Rule2: A tensor axis could at most be sharded by one mesh dimension.
 // (TODO trigger heuristics cost model and reshard to handle axis sharded by
 // multiple dimension case.)
@@ -111,18 +111,18 @@ int64_t ShardingMergeForAxis(const std::string& axis,
 TensorDistAttr CopyTensorDistAttrForOutput(const TensorDistAttr& src_dist_attr);
 
 // Resolute the partial mesh dimension of a output tensor, giving the
-// merged sharding specifcation of input tensors and the axis names of output
+// merged sharding specification of input tensors and the axis names of output
 // tensor. Input are
 std::vector<int64_t> ResoluteOutputPartialDimension(
     const std::unordered_map<std::string, int64_t>& axis_to_dim_map,
     const std::string& tensor_axes);
 
 // Generate the axis notation of tensor for the einsum notation of a broadcast
-// operation(alignment star from the rightmost axis). tenosr_ndim: the size of
-// the tensor. broadcast_ndim: the maxium size of tensors in this broadcast
+// operation(alignment star from the rightmost axis). tensor_ndim: the size of
+// the tensor. broadcast_ndim: the maximum size of tensors in this broadcast
 // operation. alphabet: the characters used to represent the axes of tensor.
 // length of alphabet should >= broadcast_ndim.
-std::string GetBroadcastAxes(const int64_t& tenosr_ndim,
+std::string GetBroadcastAxes(const int64_t& tensor_ndim,
                              const int64_t& broadcast_ndim,
                              const std::string& alphabet);
 
@@ -147,7 +147,7 @@ GetAxesDimsMappingPair(const std::vector<std::string>& tensor_axes,
 // the annotated axes after inferring forward or backward. The parameter axis
 // stores the axes of the tensor. "1" is a special axis, for the axis "1", set
 // its dims mapping to -1.
-// if unsharded_miss_axis, "-1" is assigend to axes that has no key in
+// if unsharded_miss_axis, "-1" is assigned to axes that has no key in
 // axis_to_dim_map.
 std::vector<int64_t> GetDimsMappingForAxes(
     const std::string& axes,
diff --git a/paddle/fluid/distributed/auto_parallel/spmd_rules/replicated_spmd_rule.cc b/paddle/fluid/distributed/auto_parallel/spmd_rules/replicated_spmd_rule.cc
index 230f4267c25e1a..b0ffb47c99234d 100644
--- a/paddle/fluid/distributed/auto_parallel/spmd_rules/replicated_spmd_rule.cc
+++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/replicated_spmd_rule.cc
@@ -30,7 +30,7 @@ ReplicatedSPMDRule::InferForward(const std::vector<DistTensorSpec>& input_specs,
   }
 
   // TODO(ljz): we need to know num of output and size of each output before
-  // generate the excat replicasted dist tensor attr for the current op.
+  // generate the excat replicated dist tensor attr for the current op.
   // here we just assume that only one output tensor and has the same size as
   // the first input tensor.
   return {intput_dist_attrs, {ReplicatedOnMesh(input_specs[0].dist_attr())}};
diff --git a/paddle/fluid/distributed/collective/process_group_with_stream.h b/paddle/fluid/distributed/collective/process_group_with_stream.h
index 58d1a042fec3c8..d7804d81bad126 100644
--- a/paddle/fluid/distributed/collective/process_group_with_stream.h
+++ b/paddle/fluid/distributed/collective/process_group_with_stream.h
@@ -23,7 +23,7 @@ namespace distributed {
 
 // NOTE: Notice that some backends use `stream` as an abstract conception of
 // hardward resource. We provide this base class allowing users to put
-// communications on calculation stream. In some scenorios, we found this will
+// communications on calculation stream. In some scenarios, we found this will
 // save the time of switching streams.
 class ProcessGroupWithStream : public ProcessGroup {
  public:
diff --git a/paddle/fluid/distributed/collective/reducer.cc b/paddle/fluid/distributed/collective/reducer.cc
index 6165dfc27e38ef..c9ec0dc357d9cb 100644
--- a/paddle/fluid/distributed/collective/reducer.cc
+++ b/paddle/fluid/distributed/collective/reducer.cc
@@ -905,7 +905,7 @@ void EagerReducer::MarkVarReady(const size_t var_index,
         grad_tensor.is_selected_rows(),
         true,
         platform::errors::PreconditionNotMet(
-            "The sparse parameter[%d][%s] must have a selectedrows gradient. "
+            "The sparse parameter[%d][%s] must have a selected rows gradient. "
             "Before forward pass, the parameter type is inferred to be "
             "SelectedRows, but after backward pass, its actual type becomes "
             "LodTensor. It is currently not supported by DataParallel. "
diff --git a/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc b/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc
index 4190019e0d1738..8da1ef87814deb 100644
--- a/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc
+++ b/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc
@@ -52,7 +52,7 @@ void ComputeInterceptor::DecodeMsgVars(const InterceptorMessage& msg) {
                     microbatch_scopes_.size(),
                     platform::errors::InvalidArgument(
                         "Step out of range. There are %ld "
-                        "microbatch_scopes, but recevice scope index %ld",
+                        "microbatch_scopes, but receive scope index %ld",
                         microbatch_scopes_.size(),
                         scope_id));
   auto* scope = microbatch_scopes_[scope_id];
@@ -76,7 +76,7 @@ InterceptorMessage ComputeInterceptor::PrepareVarsMsg() {
                     microbatch_scopes_.size(),
                     platform::errors::InvalidArgument(
                         "Step out of range. There are %ld "
-                        "microbatch_scopes, but recevice scope index %ld",
+                        "microbatch_scopes, but receive scope index %ld",
                         microbatch_scopes_.size(),
                         cur_scope_id_));
   auto* scope = microbatch_scopes_[cur_scope_id_];
diff --git a/paddle/fluid/distributed/fleet_executor/cond_interceptor.cc b/paddle/fluid/distributed/fleet_executor/cond_interceptor.cc
index 704dd16400065c..ad5595928b58aa 100644
--- a/paddle/fluid/distributed/fleet_executor/cond_interceptor.cc
+++ b/paddle/fluid/distributed/fleet_executor/cond_interceptor.cc
@@ -58,7 +58,7 @@ bool CondInterceptor::GetCondResult() {
                     microbatch_scopes_.size(),
                     platform::errors::InvalidArgument(
                         "Step out of range. There are %ld "
-                        "microbatch_scopes, but recevice scope index %ld",
+                        "microbatch_scopes, but receive scope index %ld",
                         microbatch_scopes_.size(),
                         cur_scope_id_));
   auto* cond_var =
diff --git a/paddle/fluid/distributed/ps/service/brpc_ps_client.cc b/paddle/fluid/distributed/ps/service/brpc_ps_client.cc
index c0e09af4c550b6..89150deff544a1 100644
--- a/paddle/fluid/distributed/ps/service/brpc_ps_client.cc
+++ b/paddle/fluid/distributed/ps/service/brpc_ps_client.cc
@@ -216,7 +216,7 @@ int32_t BrpcPsClient::InitializeFlWorker(const std::string &self_endpoint) {
     coordinator_ip_port.assign(coordinator_list[i].ip.c_str());
     coordinator_ip_port.append(":");
     coordinator_ip_port.append(std::to_string(coordinator_list[i].port));
-    VLOG(0) << "fl-ps > BrpcFlclient connetcting to coordinator: "
+    VLOG(0) << "fl-ps > BrpcFlclient connecting to coordinator: "
             << coordinator_ip_port;
     for (size_t j = 0; j < _coordinator_channels[i].size(); ++j) {
       _coordinator_channels[i][j].reset(new brpc::Channel());
@@ -383,7 +383,7 @@ int32_t BrpcPsClient::Initialize() {
 
 int DownpourBrpcClosure::check_response(size_t request_idx, int cmd_id) {
   if (_cntls[request_idx]->Failed()) {
-    LOG(ERROR) << "resquest cmd_id:" << cmd_id
+    LOG(ERROR) << "request cmd_id:" << cmd_id
                << " failed, "
                   "err:"
                << _cntls[request_idx]->ErrorText();
@@ -1622,7 +1622,7 @@ void BrpcPsClient::PushSparseTaskConsume() {
       auto sparse_task_data = _sparse_task_pool.get();
 
       task_list.clear();
-      int cur_meger_size = task_queue->Size();
+      int cur_merge_size = task_queue->Size();
 
       // task_list[0] 为一个空SparseAsyncTask, 分shard异步merge结果存入此结构。
       sparse_task_data->shared_data.resize(request_call_num);
@@ -1632,12 +1632,12 @@ void BrpcPsClient::PushSparseTaskConsume() {
       auto async_task =
           new SparseAsyncTask(sparse_task_data, table_id, push_timer);
 
-      task_list.reserve(cur_meger_size + 1);
+      task_list.reserve(cur_merge_size + 1);
 
       task_list.push_back(
           std::move(std::shared_ptr<SparseAsyncTask>(async_task)));
 
-      while (!task_queue->Empty() && merge_count < cur_meger_size) {
+      while (!task_queue->Empty() && merge_count < cur_merge_size) {
         ++merge_count;
         SparseAsyncTask *task = nullptr;
         task_queue->Get(task);
diff --git a/paddle/fluid/distributed/ps/service/graph_brpc_server.cc b/paddle/fluid/distributed/ps/service/graph_brpc_server.cc
index 945e758daeeb7f..fb0caf64fa3e6e 100644
--- a/paddle/fluid/distributed/ps/service/graph_brpc_server.cc
+++ b/paddle/fluid/distributed/ps/service/graph_brpc_server.cc
@@ -316,7 +316,7 @@ int32_t GraphBrpcService::LoadOneTable(Table *table,
     set_response_code(
         response,
         -1,
-        "PsRequestMessage.datas is requeired at least 2 for path & load_param");
+        "PsRequestMessage.datas is required at least 2 for path & load_param");
     return -1;
   }
   if (table->Load(request.params(0), request.params(1)) != 0) {