PaddlePaddle · luotao1 · Jan 29, 2024 · Jan 28, 2024 · Jan 28, 2024
diff --git a/paddle/fluid/framework/downpour_lite_worker.cc b/paddle/fluid/framework/downpour_lite_worker.cc
@@ -117,7 +117,7 @@ void DownpourLiteWorker::Initialize(const TrainerDesc& desc) {
             << dest_table;
     copy_dense_tables_.push_back(std::make_pair(src_table, dest_table));
   }
-  for (auto& m : copy_table_config_.table_denpendency_map()) {
+  for (auto& m : copy_table_config_.table_dependency_map()) {
     if (sparse_key_names_.find(m.key()) != sparse_key_names_.end()) {
       // currently only support one dependency
       for (auto& value : m.values()) {

diff --git a/paddle/fluid/framework/downpour_worker.cc b/paddle/fluid/framework/downpour_worker.cc
@@ -116,7 +116,7 @@ void DownpourWorker::Initialize(const TrainerDesc& desc) {
             << dest_table;
     copy_dense_tables_.emplace_back(src_table, dest_table);
   }
-  for (auto& m : copy_table_config_.table_denpendency_map()) {
+  for (auto& m : copy_table_config_.table_dependency_map()) {
     if (sparse_key_names_.find(m.key()) != sparse_key_names_.end()) {
       // currently only support one dependency
       for (auto& value : m.values()) {

diff --git a/paddle/fluid/framework/downpour_worker_opt.cc b/paddle/fluid/framework/downpour_worker_opt.cc
@@ -177,7 +177,7 @@ void DownpourWorkerOpt::Initialize(const TrainerDesc& desc) {
             << dest_table;
     copy_dense_tables_.emplace_back(src_table, dest_table);
   }
-  for (auto& m : copy_table_config_.table_denpendency_map()) {
+  for (auto& m : copy_table_config_.table_dependency_map()) {
     if (sparse_key_names_.find(m.key()) != sparse_key_names_.end()) {
       // currently only support one dependency
       for (auto& value : m.values()) {

diff --git a/paddle/fluid/framework/fleet/heter_context.h b/paddle/fluid/framework/fleet/heter_context.h
@@ -172,7 +172,7 @@ class HeterContext {
         }
       }
     } else {
-      VLOG(3) << "Reset gpu task with dynamic mf dimention";
+      VLOG(3) << "Reset gpu task with dynamic mf dimension";
       for (size_t i = 0; i < feature_dim_keys_.size(); i++) {
         for (size_t j = 0; j < feature_dim_keys_[i].size(); j++) {
           feature_dim_keys_[i][j].clear();
@@ -262,7 +262,7 @@ class HeterContext {
           threads.push_back(std::thread(unique_dynamic_mf_func, i, j));
         }
       }
-      VLOG(3) << "heter_context unique keys with dynamic mf dimention";
+      VLOG(3) << "heter_context unique keys with dynamic mf dimension";
     }
     for (std::thread& t : threads) {
       t.join();

diff --git a/paddle/fluid/framework/fleet/ps_gpu_wrapper.cc b/paddle/fluid/framework/fleet/ps_gpu_wrapper.cc
@@ -2752,7 +2752,7 @@ void PSGPUWrapper::PushSparseGrad(const paddle::platform::Place& place,
       VLOG(3) << "Begin GPUPS PushSparseGrad";
 
       auto buf = memory::Alloc(place, total_length * grad_value_size);
-      VLOG(3) << "Push Sparse Max mf dimention: " << max_mf_dim_
+      VLOG(3) << "Push Sparse Max mf dimension: " << max_mf_dim_
               << "grad_value_size:" << grad_value_size;
       float* total_grad_values_gpu = reinterpret_cast<float*>(buf->ptr());
 
@@ -2790,7 +2790,7 @@ void PSGPUWrapper::PushSparseGrad(const paddle::platform::Place& place,
     VLOG(3) << "Begin GPUPS PushSparseGrad";
 
     auto buf = memory::Alloc(place, total_length * grad_value_size);
-    VLOG(3) << "Push Sparse Max mf dimention: " << max_mf_dim_
+    VLOG(3) << "Push Sparse Max mf dimension: " << max_mf_dim_
             << "grad_value_size:" << grad_value_size;
     float* total_grad_values_gpu = reinterpret_cast<float*>(buf->ptr());
     phi::DenseTensor& total_keys_tensor = keys_tensor[devid_2_index];

diff --git a/paddle/fluid/framework/hetercpu_worker.cc b/paddle/fluid/framework/hetercpu_worker.cc
@@ -180,7 +180,7 @@ void HeterCpuWorker::Initialize(const TrainerDesc& desc) {
             << dest_table;
     copy_dense_tables_.push_back(std::make_pair(src_table, dest_table));
   }
-  for (auto& m : copy_table_config_.table_denpendency_map()) {
+  for (auto& m : copy_table_config_.table_dependency_map()) {
     if (sparse_key_names_.find(m.key()) != sparse_key_names_.end()) {
       // currently only support one dependency
       for (auto& value : m.values()) {

diff --git a/paddle/fluid/framework/hogwild_worker.cc b/paddle/fluid/framework/hogwild_worker.cc
@@ -805,7 +805,7 @@ void HogwildWorker::CreateThreadOperators(const ProgramDesc &program) {
       // depend_builder.Build(ops_, start_index, sharding_mode_);  hbm not safe
       // should run in debug model need to fix
       depend_builder.Build(ops_, start_index, false);
-      new_order = depend_builder.get_new_exexutor_order();
+      new_order = depend_builder.get_new_executor_order();
       std::vector<std::unique_ptr<OperatorBase>> new_ops;
       std::vector<size_t> final_order;
       std::vector<std::string> new_op_names;

diff --git a/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass.cc b/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass.cc
@@ -1805,7 +1805,7 @@ int FusedMultiTransformerEncoderPass::BuildFusion(Graph* graph,
     auto* bv_tensor =
         scope->FindVar(eltadd2_b->Name())->GetMutable<phi::DenseTensor>();
 
-    // NOTE(minghaoBD): to make it compatible with strucutured pruning on
+    // NOTE(minghaoBD): to make it compatible with structured pruning on
     // num_head dimension:
     // 1. get dim_head from reshape.shape[3], dim_embed from
     // layer_norm_bias.shape[0]
@@ -1952,7 +1952,7 @@ int FusedMultiTransformerEncoderPass::BuildFusion(Graph* graph,
       auto ffn1_in_scale = PADDLE_GET_CONST(
           float, ffn_matmul_1_op->GetAttr("Input_scale_" + ffn1_input_name));
 
-      // Calc outscale and Set them
+      // Calc out scale and Set them
       auto qkv_weight_scale =
           PADDLE_GET_CONST(float, matmul0_op->GetAttr("weight_scale"));
       auto out_weight_scale =
@@ -2629,7 +2629,7 @@ int FusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
     auto* qkv_b_tensor =
         scope->FindVar(eltadd0_b->Name())->GetMutable<phi::DenseTensor>();
 
-    // NOTE(minghaoBD): to make it compatible with strucutured pruning on
+    // NOTE(minghaoBD): to make it compatible with structured pruning on
     // num_head dimension:
     // 1. get dim_head from reshape.shape[3], dim_embed from
     // layer_norm_bias.shape[0]
@@ -2758,9 +2758,9 @@ int FusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
       auto ffn1_in_scale = PADDLE_GET_CONST(
           float, ffn_matmul_1_op->GetAttr("Input_scale_" + ffn1_input_name));
 
-      // Calc outscale and Set them
+      // Calc out scale and Set them
       // TODO(wufeisheng): Currently just match layer-wise weight scale, where
-      // channel-wise weight scale should also be surpported.
+      // channel-wise weight scale should also be supported.
       auto qkv_weight_scale =
           PADDLE_GET_CONST(float, matmul0_op->GetAttr("weight_scale"));
       auto out_weight_scale =
@@ -4267,7 +4267,7 @@ int MultiDevicesFusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
     auto* qkv_b_tensor =
         scope->FindVar(eltadd0_b->Name())->GetMutable<phi::DenseTensor>();
 
-    // NOTE(minghaoBD): to make it compatible with strucutured pruning on
+    // NOTE(minghaoBD): to make it compatible with structured pruning on
     // num_head dimension:
     // 1. get dim_head from reshape.shape[3], dim_embed from
     // layer_norm_bias.shape[0]
@@ -4407,7 +4407,7 @@ int MultiDevicesFusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
       auto ffn1_in_scale = PADDLE_GET_CONST(
           float, ffn_matmul_1_op->GetAttr("Input_scale_" + ffn1_input_name));
 
-      // Calc outscale and Set them
+      // Calc out scale and Set them
       auto qkv_weight_scale =
           PADDLE_GET_CONST(float, matmul0_op->GetAttr("weight_scale"));
       auto out_weight_scale =

diff --git a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc
@@ -452,7 +452,7 @@ PDNode* MultiHeadMatmulPattern::operator()() {
 }
 
 PDNode* MultiHeadMatmulV3Pattern::operator()() {
-  // Add mul op to support huggingface onnx model convertsion by x2paddle
+  // Add mul op to support huggingface onnx model conversion by x2paddle
   std::unordered_set<std::string> matmul_ops{"mul", "matmul", "matmul_v2"};
   auto* input0 = pattern->NewNode(input0_repr());
   input0->assert_is_ops_input(matmul_ops);

diff --git a/paddle/fluid/framework/ir/multihead_matmul_roformer_fuse_pass.h b/paddle/fluid/framework/ir/multihead_matmul_roformer_fuse_pass.h
@@ -24,7 +24,7 @@ namespace framework {
 namespace ir {
 namespace patterns {
 /*
- * \brief   Fuse the subgraph representing multihead attention part of roformer
+ * \brief   Fuse the subgraph representing multi-head attention part of roformer
  * into multihead_matmul_roformer op.
  *
  * \note    The following graph represents this equation:

diff --git a/paddle/fluid/framework/ir/xpu/fast_where_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/fast_where_xpu_fuse_pass.cc
@@ -438,7 +438,7 @@ CascadeFastWhereXPUPattern::CascadeFastWhereXPUPattern(
       pattern->NewNode(fast_where_xpu0_repr())->assert_is_op("fast_where_xpu");
   auto fast_where_xpu1 =
       pattern->NewNode(fast_where_xpu1_repr())->assert_is_op("fast_where_xpu");
-  // declare vairable nodes
+  // declare variable nodes
   auto condition0 = pattern->NewNode(condition0_repr())
                         ->assert_is_op_input("fast_where_xpu", "condition");
   auto condition1 = pattern->NewNode(condition1_repr())

diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc
@@ -320,7 +320,7 @@ void NaiveExecutor::ResetTrtOps(int num) {
 #endif
 }
 
-void NaiveExecutor::CloneLiteEnigne(int num, void *stream) {
+void NaiveExecutor::CloneLiteEngine(int num, void *stream) {
 #ifdef PADDLE_WITH_LITE
   for (auto &op : ops_) {
     if (op->Type() == "lite_engine") {

diff --git a/paddle/fluid/framework/naive_executor.h b/paddle/fluid/framework/naive_executor.h
@@ -90,7 +90,7 @@ class NaiveExecutor {
 
   void ResetTrtOps(int num);
 
-  void CloneLiteEnigne(int num, void* stream);
+  void CloneLiteEngine(int num, void* stream);
 
   void RegisterOutputHook(const HookFunc& hookfunc);
   void RegisterInputHook(const HookFunc& hookfunc);

diff --git a/paddle/fluid/framework/new_executor/feed_fetch_utils.cc b/paddle/fluid/framework/new_executor/feed_fetch_utils.cc
@@ -77,7 +77,7 @@ void SplitFeedTensors(const std::vector<std::string>& feed_names,
                       0,
                       phi::errors::InvalidArgument(
                           "Split expects feed data (%s)'s dim[0] (%d) is "
-                          "diviable by micro_batch_num (%d).",
+                          "divisible by micro_batch_num (%d).",
                           feed_names[i],
                           numel_size,
                           micro_batch_num));
@@ -211,7 +211,7 @@ void MergeTensors(const std::vector<const phi::DenseTensor*>& tensors,
               tensor_dims[j],
               new_dim[j],
               phi::errors::InvalidArgument(
-                  "DenseTensor.ddim[%d] should eaqual to %d, but is %d",
+                  "DenseTensor.ddim[%d] should equal to %d, but is %d",
                   j,
                   new_dim[j],
                   tensor_dims[j]));

diff --git a/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.cc b/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.cc
@@ -251,7 +251,7 @@ void IfInstruction::Run() {
     false_branch_inter_->Run({}, false);
     CopyBranchOutput(false_branch_outputs_, false_branch_inter_);
   }
-  // copy ouptut
+  // copy output
 }
 
 }  // namespace framework

diff --git a/paddle/fluid/framework/new_executor/instruction/custom_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/custom_kernel_instruction.cc
@@ -89,7 +89,7 @@ void CustomKernelInstruction::BuildCustomContext(
         input_ptrs_.emplace_back(nullptr);
         custom_kernel_ctx_.EmplaceBackInput(std::move(paddle::Tensor()));
       }
-      VLOG(8) << "ctx->EmplaceBackInput : an optioanl input " << t;
+      VLOG(8) << "ctx->EmplaceBackInput : an optional input " << t;
       continue;
     }
     auto in_var_name = value_exec_info_.GetVarName(ptr);
@@ -285,7 +285,7 @@ void CustomKernelInstruction::BuildCustomContext(
       cache_out_ptrs_.emplace_back(nullptr);
       custom_kernel_ctx_.EmplaceBackOutput(std::move(paddle::Tensor()));
 
-      VLOG(8) << "ctx->EmplaceBackOutput : an optioanl output";
+      VLOG(8) << "ctx->EmplaceBackOutput : an optional output";
       continue;
     }
 

diff --git a/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc b/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc
@@ -32,7 +32,7 @@ PADDLE_DEFINE_EXPORTED_bool(
 
 // The difference between "sequential_run" and "serial_run":
 // "sequential_run" dispatches OPs one by one according to the sequence in the
-// Program, while "serial_run" ensures that all Ops are scheduled in a singal
+// Program, while "serial_run" ensures that all Ops are scheduled in a signal
 // thread. In standalone executor, "sequential_run" is also "serial_run", while
 // "serial_run" is not necessarily "sequential_run".
 PADDLE_DEFINE_EXPORTED_bool(new_executor_sequential_run,
@@ -206,13 +206,13 @@ void DependencyBuilder::AddDependencyForCoalesceTensorOp() {
       }
 
       // find first op read 'outputs' between (first_read_fused_out_op, end)
-      // add depned:  first_read_fused_out_op -> first op that reads 'outputs'
+      // add depend:  first_read_fused_out_op -> first op that reads 'outputs'
 
       // special case for consecutive communication ops, for example,
       // FusedOutput = c_sync_calc_stream(FusedOutput)
       // FusedOutput= c_allreduce_sum(FusedOutput)
       // FusedOutput = c_sync_comm_stream(FusedOutput)
-      // we should take the last one to add depned instead of
+      // we should take the last one to add depend instead of
       // 'first_read_fused_out_op'
       size_t target = first_read_fused_out_op;
       for (size_t j = first_read_fused_out_op + 1; j < op_num_; ++j) {
@@ -355,8 +355,8 @@ void DependencyBuilder::AddDownstreamOp(size_t prior_op_idx,
   std::set<size_t>& downstream_ops = (*op_downstream_map_)[prior_op_idx];
   // NOTE(Ruibiao): Here the downstream map shrinking is best-effort, therefore
   // ShrinkDownstreamMap after BuildDownstreamMap is still helpful. For example,
-  // a->c will not be shrinked in the following case: AddDownstreamOp(a, b) ->
-  // AddDownstreamOp(a, c) -> AddDownstreamOp(b, c), it should be shrinked by
+  // a->c will not be shrunk in the following case: AddDownstreamOp(a, b) ->
+  // AddDownstreamOp(a, c) -> AddDownstreamOp(b, c), it should be shrunk by
   // ShrinkDownstreamMap.
   for (size_t op_idx : downstream_ops) {
     if (OpHappensBefore(op_idx, posterior_op_idx)) {
@@ -531,7 +531,7 @@ void DependencyBuilder::ShrinkDownstreamMap() {
       }
     }
     // NOTE(Ruibiao): op_happens_before will not be changed when shrink
-    // dowstream map
+    // downstream map
     (*op_downstream_map_)[i] = minumum_nexts;
   }
   VLOG(8) << "Finish shrink downstream map";
@@ -963,7 +963,7 @@ void DependencyBuilderSimplify::ShrinkDownstreamMap() {
       }
     }
     // NOTE(Ruibiao): op_happens_before will not be changed when shrink
-    // dowstream map
+    // downstream map
     op_downstream_map_.at(i) = minumum_nexts;
   }
   VLOG(8) << "Finish shrink downstream map";
@@ -1031,13 +1031,13 @@ void DependencyBuilderSimplify::AddDependencyForCoalesceTensorOp() {
       }
 
       // find first op read 'outputs' between (first_read_fused_out_op, end)
-      // add depned:  first_read_fused_out_op -> first op that reads 'outputs'
+      // add depend:  first_read_fused_out_op -> first op that reads 'outputs'
 
       // special case for consecutive communication ops, for example,
       // FusedOutput = c_sync_calc_stream(FusedOutput)
       // FusedOutput= c_allreduce_sum(FusedOutput)
       // FusedOutput = c_sync_comm_stream(FusedOutput)
-      // we should take the last one to add depned instead of
+      // we should take the last one to add depend instead of
       // 'first_read_fused_out_op'
       size_t target = first_read_fused_out_op;
       for (size_t j = first_read_fused_out_op + 1; j < op_num_; ++j) {
@@ -1236,8 +1236,8 @@ void DependencyBuilderSimplify::SetSameStream() {
   }
 }
 
-// get_new_exector_order  by dfs
-std::vector<size_t> DependencyBuilderSimplify::get_new_exexutor_order() {
+// get_new_executor_order  by dfs
+std::vector<size_t> DependencyBuilderSimplify::get_new_executor_order() {
   PADDLE_ENFORCE_EQ(
       is_build_,
       true,
@@ -1288,17 +1288,17 @@ std::vector<size_t> DependencyBuilderSimplify::get_new_exexutor_order() {
     is_visit[op_idx] = true;
   }
 
-  std::vector<size_t> dependecy_count(op_num_, 0);
+  std::vector<size_t> dependency_count(op_num_, 0);
   for (auto it : op_downstream_map_) {
     for (auto op_idx : it.second) {
-      dependecy_count[op_idx]++;
+      dependency_count[op_idx]++;
     }
   }
   std::stack<size_t> s;
   std::priority_queue<std::pair<size_t, size_t>> pq;
 
   for (size_t op_idx = op_num_ - 1; op_idx >= start_index_; op_idx--) {
-    if (dependecy_count[op_idx] == 0) {
+    if (dependency_count[op_idx] == 0) {
       pq.push(std::make_pair(op_behind_num[op_idx], op_idx));
     }
   }
@@ -1318,7 +1318,7 @@ std::vector<size_t> DependencyBuilderSimplify::get_new_exexutor_order() {
       for (auto it = op_downstream_map_[current].rbegin();
            it != op_downstream_map_[current].rend();
            it++) {
-        if (--dependecy_count[*it] == 0 && !not_usefull_op.count(current)) {
+        if (--dependency_count[*it] == 0 && !not_usefull_op.count(current)) {
           pq.push(std::make_pair(op_behind_num[*it], *it));
           // s.push(*it);
         }
@@ -1383,8 +1383,8 @@ void DependencyBuilderSimplify::AddDownstreamOp(size_t prior_op_idx,
   std::set<size_t>& downstream_ops = op_downstream_map_[prior_op_idx];
   // NOTE(Ruibiao): Here the downstream map shrinking is best-effort, therefore
   // ShrinkDownstreamMap after BuildDownstreamMap is still helpful. For example,
-  // a->c will not be shrinked in the following case: AddDownstreamOp(a, b) ->
-  // AddDownstreamOp(a, c) -> AddDownstreamOp(b, c), it should be shrinked by
+  // a->c will not be shrunk in the following case: AddDownstreamOp(a, b) ->
+  // AddDownstreamOp(a, c) -> AddDownstreamOp(b, c), it should be shrunk by
   // ShrinkDownstreamMap.
   for (size_t op_idx : downstream_ops) {
     if (OpHappensBefore(op_idx, posterior_op_idx)) {

diff --git a/paddle/fluid/framework/new_executor/interpreter/dependency_builder.h b/paddle/fluid/framework/new_executor/interpreter/dependency_builder.h
@@ -27,7 +27,7 @@ class InstructionBase;
 namespace interpreter {
 
 // DependencyBuilder provides some dependency adding function to handle the
-// dependency that cannot be explicitly expresed by a Program. It is a
+// dependency that cannot be explicitly expressed by a Program. It is a
 // compromise of the incomplete expression ability of the Program. Do not add
 // too many functions here at will, that will bring great burden to the
 // Interpretercore.
@@ -146,7 +146,7 @@ class DependencyBuilderSimplify {
         phi::errors::Unavailable("op_happen_before is not yet built"));
     return op_happens_before_.at(prior_op_idx).at(posterior_op_idx);
   }
-  std::vector<size_t> get_new_exexutor_order();
+  std::vector<size_t> get_new_executor_order();
 
  private:
   void AddDependencyForCoalesceTensorOp();

diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.cc b/paddle/fluid/framework/new_executor/new_executor_defs.cc
@@ -315,7 +315,7 @@ void Instruction::AddInplace(Variable* in, Variable* out) {
 void Instruction::ClearInplace() { vec_inplace_in_to_out_.clear(); }
 
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-void Instruction::UpdataRecordStreamForGcInfo() {
+void Instruction::UpdateRecordStreamForGcInfo() {
   if (!IsInterpretercoreFastGCEnabled() ||
       KernelType() != OpFuncType::kGpuAsync) {
     return;

diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.h b/paddle/fluid/framework/new_executor/new_executor_defs.h
@@ -309,7 +309,7 @@ class Instruction {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
   bool need_record_stream_for_gc_ = false;
   gpuStream_t stream_{nullptr};
-  void UpdataRecordStreamForGcInfo();
+  void UpdateRecordStreamForGcInfo();
 #endif
 
   bool can_use_infermeta_ctx_ = false;