From e9021b9747d4c8eef81c1060182cb57370c1d433 Mon Sep 17 00:00:00 2001 From: co63oc Date: Sun, 28 Jan 2024 08:04:07 +0800 Subject: [PATCH 1/2] Fix --- .../fluid/framework/downpour_lite_worker.cc | 2 +- paddle/fluid/framework/downpour_worker.cc | 2 +- paddle/fluid/framework/downpour_worker_opt.cc | 2 +- paddle/fluid/framework/fleet/heter_context.h | 4 +- .../fluid/framework/fleet/ps_gpu_wrapper.cc | 4 +- paddle/fluid/framework/hetercpu_worker.cc | 2 +- paddle/fluid/framework/hogwild_worker.cc | 2 +- .../fused_multi_transformer_encoder_pass.cc | 14 +++--- .../ir/multihead_matmul_fuse_pass.cc | 2 +- .../ir/multihead_matmul_roformer_fuse_pass.h | 2 +- .../ir/xpu/fast_where_xpu_fuse_pass.cc | 2 +- paddle/fluid/framework/naive_executor.cc | 2 +- paddle/fluid/framework/naive_executor.h | 2 +- .../new_executor/feed_fetch_utils.cc | 4 +- .../control_flow/if_instruction.cc | 2 +- .../instruction/custom_kernel_instruction.cc | 4 +- .../interpreter/dependency_builder.cc | 34 ++++++------- .../interpreter/dependency_builder.h | 4 +- .../new_executor/new_executor_defs.cc | 2 +- .../new_executor/new_executor_defs.h | 2 +- .../framework/new_executor/pir_interpreter.cc | 46 +++++++++--------- .../framework/new_executor/pir_interpreter.h | 14 +++--- .../new_executor/program_interpreter.cc | 48 +++++++++---------- .../new_executor/program_interpreter.h | 14 +++--- paddle/fluid/framework/parallel_executor.cc | 4 +- paddle/fluid/framework/ps_gpu_worker.cc | 2 +- paddle/fluid/framework/trainer_desc.proto | 2 +- .../fluid/inference/api/analysis_predictor.cc | 4 +- paddle/fluid/jit/property.h | 2 +- python/paddle/base/trainer_desc.py | 2 +- 30 files changed, 116 insertions(+), 116 deletions(-) diff --git a/paddle/fluid/framework/downpour_lite_worker.cc b/paddle/fluid/framework/downpour_lite_worker.cc index c57ef71ae0342b..3d453c018c1d5f 100644 --- a/paddle/fluid/framework/downpour_lite_worker.cc +++ b/paddle/fluid/framework/downpour_lite_worker.cc @@ -117,7 +117,7 @@ void DownpourLiteWorker::Initialize(const TrainerDesc& desc) { << dest_table; copy_dense_tables_.push_back(std::make_pair(src_table, dest_table)); } - for (auto& m : copy_table_config_.table_denpendency_map()) { + for (auto& m : copy_table_config_.table_dependency_map()) { if (sparse_key_names_.find(m.key()) != sparse_key_names_.end()) { // currently only support one dependency for (auto& value : m.values()) { diff --git a/paddle/fluid/framework/downpour_worker.cc b/paddle/fluid/framework/downpour_worker.cc index c9bd59f912d7a3..6ce2967a08f1f5 100644 --- a/paddle/fluid/framework/downpour_worker.cc +++ b/paddle/fluid/framework/downpour_worker.cc @@ -116,7 +116,7 @@ void DownpourWorker::Initialize(const TrainerDesc& desc) { << dest_table; copy_dense_tables_.emplace_back(src_table, dest_table); } - for (auto& m : copy_table_config_.table_denpendency_map()) { + for (auto& m : copy_table_config_.table_dependency_map()) { if (sparse_key_names_.find(m.key()) != sparse_key_names_.end()) { // currently only support one dependency for (auto& value : m.values()) { diff --git a/paddle/fluid/framework/downpour_worker_opt.cc b/paddle/fluid/framework/downpour_worker_opt.cc index d7d8a7ff883cdd..2e3a83251de157 100644 --- a/paddle/fluid/framework/downpour_worker_opt.cc +++ b/paddle/fluid/framework/downpour_worker_opt.cc @@ -177,7 +177,7 @@ void DownpourWorkerOpt::Initialize(const TrainerDesc& desc) { << dest_table; copy_dense_tables_.emplace_back(src_table, dest_table); } - for (auto& m : copy_table_config_.table_denpendency_map()) { + for (auto& m : copy_table_config_.table_dependency_map()) { if (sparse_key_names_.find(m.key()) != sparse_key_names_.end()) { // currently only support one dependency for (auto& value : m.values()) { diff --git a/paddle/fluid/framework/fleet/heter_context.h b/paddle/fluid/framework/fleet/heter_context.h index 4c5f03d1bb780a..f7cce0ab44940a 100644 --- a/paddle/fluid/framework/fleet/heter_context.h +++ b/paddle/fluid/framework/fleet/heter_context.h @@ -172,7 +172,7 @@ class HeterContext { } } } else { - VLOG(3) << "Reset gpu task with dynamic mf dimention"; + VLOG(3) << "Reset gpu task with dynamic mf dimension"; for (size_t i = 0; i < feature_dim_keys_.size(); i++) { for (size_t j = 0; j < feature_dim_keys_[i].size(); j++) { feature_dim_keys_[i][j].clear(); @@ -262,7 +262,7 @@ class HeterContext { threads.push_back(std::thread(unique_dynamic_mf_func, i, j)); } } - VLOG(3) << "heter_context unique keys with dynamic mf dimention"; + VLOG(3) << "heter_context unique keys with dynamic mf dimension"; } for (std::thread& t : threads) { t.join(); diff --git a/paddle/fluid/framework/fleet/ps_gpu_wrapper.cc b/paddle/fluid/framework/fleet/ps_gpu_wrapper.cc index b3e48e0d5b63b9..0399c37d22b689 100644 --- a/paddle/fluid/framework/fleet/ps_gpu_wrapper.cc +++ b/paddle/fluid/framework/fleet/ps_gpu_wrapper.cc @@ -2752,7 +2752,7 @@ void PSGPUWrapper::PushSparseGrad(const paddle::platform::Place& place, VLOG(3) << "Begin GPUPS PushSparseGrad"; auto buf = memory::Alloc(place, total_length * grad_value_size); - VLOG(3) << "Push Sparse Max mf dimention: " << max_mf_dim_ + VLOG(3) << "Push Sparse Max mf dimension: " << max_mf_dim_ << "grad_value_size:" << grad_value_size; float* total_grad_values_gpu = reinterpret_cast(buf->ptr()); @@ -2790,7 +2790,7 @@ void PSGPUWrapper::PushSparseGrad(const paddle::platform::Place& place, VLOG(3) << "Begin GPUPS PushSparseGrad"; auto buf = memory::Alloc(place, total_length * grad_value_size); - VLOG(3) << "Push Sparse Max mf dimention: " << max_mf_dim_ + VLOG(3) << "Push Sparse Max mf dimension: " << max_mf_dim_ << "grad_value_size:" << grad_value_size; float* total_grad_values_gpu = reinterpret_cast(buf->ptr()); phi::DenseTensor& total_keys_tensor = keys_tensor[devid_2_index]; diff --git a/paddle/fluid/framework/hetercpu_worker.cc b/paddle/fluid/framework/hetercpu_worker.cc index f741baa0f3d2d8..0959b0ae334424 100644 --- a/paddle/fluid/framework/hetercpu_worker.cc +++ b/paddle/fluid/framework/hetercpu_worker.cc @@ -180,7 +180,7 @@ void HeterCpuWorker::Initialize(const TrainerDesc& desc) { << dest_table; copy_dense_tables_.push_back(std::make_pair(src_table, dest_table)); } - for (auto& m : copy_table_config_.table_denpendency_map()) { + for (auto& m : copy_table_config_.table_dependency_map()) { if (sparse_key_names_.find(m.key()) != sparse_key_names_.end()) { // currently only support one dependency for (auto& value : m.values()) { diff --git a/paddle/fluid/framework/hogwild_worker.cc b/paddle/fluid/framework/hogwild_worker.cc index 5d4b32918f05f8..d95007043dfb54 100644 --- a/paddle/fluid/framework/hogwild_worker.cc +++ b/paddle/fluid/framework/hogwild_worker.cc @@ -805,7 +805,7 @@ void HogwildWorker::CreateThreadOperators(const ProgramDesc &program) { // depend_builder.Build(ops_, start_index, sharding_mode_); hbm not safe // should run in debug model need to fix depend_builder.Build(ops_, start_index, false); - new_order = depend_builder.get_new_exexutor_order(); + new_order = depend_builder.get_new_executor_order(); std::vector> new_ops; std::vector final_order; std::vector new_op_names; diff --git a/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass.cc b/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass.cc index e8be50b71917c6..b749a1f282c5b3 100644 --- a/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass.cc +++ b/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass.cc @@ -1805,7 +1805,7 @@ int FusedMultiTransformerEncoderPass::BuildFusion(Graph* graph, auto* bv_tensor = scope->FindVar(eltadd2_b->Name())->GetMutable(); - // NOTE(minghaoBD): to make it compatible with strucutured pruning on + // NOTE(minghaoBD): to make it compatible with structured pruning on // num_head dimension: // 1. get dim_head from reshape.shape[3], dim_embed from // layer_norm_bias.shape[0] @@ -1952,7 +1952,7 @@ int FusedMultiTransformerEncoderPass::BuildFusion(Graph* graph, auto ffn1_in_scale = PADDLE_GET_CONST( float, ffn_matmul_1_op->GetAttr("Input_scale_" + ffn1_input_name)); - // Calc outscale and Set them + // Calc out scale and Set them auto qkv_weight_scale = PADDLE_GET_CONST(float, matmul0_op->GetAttr("weight_scale")); auto out_weight_scale = @@ -2629,7 +2629,7 @@ int FusedMultiTransformerEncoderFuseQKVPass::BuildFusion( auto* qkv_b_tensor = scope->FindVar(eltadd0_b->Name())->GetMutable(); - // NOTE(minghaoBD): to make it compatible with strucutured pruning on + // NOTE(minghaoBD): to make it compatible with structured pruning on // num_head dimension: // 1. get dim_head from reshape.shape[3], dim_embed from // layer_norm_bias.shape[0] @@ -2758,9 +2758,9 @@ int FusedMultiTransformerEncoderFuseQKVPass::BuildFusion( auto ffn1_in_scale = PADDLE_GET_CONST( float, ffn_matmul_1_op->GetAttr("Input_scale_" + ffn1_input_name)); - // Calc outscale and Set them + // Calc out scale and Set them // TODO(wufeisheng): Currently just match layer-wise weight scale, where - // channel-wise weight scale should also be surpported. + // channel-wise weight scale should also be supported. auto qkv_weight_scale = PADDLE_GET_CONST(float, matmul0_op->GetAttr("weight_scale")); auto out_weight_scale = @@ -4267,7 +4267,7 @@ int MultiDevicesFusedMultiTransformerEncoderFuseQKVPass::BuildFusion( auto* qkv_b_tensor = scope->FindVar(eltadd0_b->Name())->GetMutable(); - // NOTE(minghaoBD): to make it compatible with strucutured pruning on + // NOTE(minghaoBD): to make it compatible with structured pruning on // num_head dimension: // 1. get dim_head from reshape.shape[3], dim_embed from // layer_norm_bias.shape[0] @@ -4407,7 +4407,7 @@ int MultiDevicesFusedMultiTransformerEncoderFuseQKVPass::BuildFusion( auto ffn1_in_scale = PADDLE_GET_CONST( float, ffn_matmul_1_op->GetAttr("Input_scale_" + ffn1_input_name)); - // Calc outscale and Set them + // Calc out scale and Set them auto qkv_weight_scale = PADDLE_GET_CONST(float, matmul0_op->GetAttr("weight_scale")); auto out_weight_scale = diff --git a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc index 25f120c7866b50..ebf273a8d1c2ea 100644 --- a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc @@ -452,7 +452,7 @@ PDNode* MultiHeadMatmulPattern::operator()() { } PDNode* MultiHeadMatmulV3Pattern::operator()() { - // Add mul op to support huggingface onnx model convertsion by x2paddle + // Add mul op to support huggingface onnx model conversion by x2paddle std::unordered_set matmul_ops{"mul", "matmul", "matmul_v2"}; auto* input0 = pattern->NewNode(input0_repr()); input0->assert_is_ops_input(matmul_ops); diff --git a/paddle/fluid/framework/ir/multihead_matmul_roformer_fuse_pass.h b/paddle/fluid/framework/ir/multihead_matmul_roformer_fuse_pass.h index 4d081e7c3ac780..f43206d3c74cd6 100644 --- a/paddle/fluid/framework/ir/multihead_matmul_roformer_fuse_pass.h +++ b/paddle/fluid/framework/ir/multihead_matmul_roformer_fuse_pass.h @@ -24,7 +24,7 @@ namespace framework { namespace ir { namespace patterns { /* - * \brief Fuse the subgraph representing multihead attention part of roformer + * \brief Fuse the subgraph representing multi-head attention part of roformer * into multihead_matmul_roformer op. * * \note The following graph represents this equation: diff --git a/paddle/fluid/framework/ir/xpu/fast_where_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/fast_where_xpu_fuse_pass.cc index c8c36c7134dade..2723105fa0c466 100644 --- a/paddle/fluid/framework/ir/xpu/fast_where_xpu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/xpu/fast_where_xpu_fuse_pass.cc @@ -438,7 +438,7 @@ CascadeFastWhereXPUPattern::CascadeFastWhereXPUPattern( pattern->NewNode(fast_where_xpu0_repr())->assert_is_op("fast_where_xpu"); auto fast_where_xpu1 = pattern->NewNode(fast_where_xpu1_repr())->assert_is_op("fast_where_xpu"); - // declare vairable nodes + // declare variable nodes auto condition0 = pattern->NewNode(condition0_repr()) ->assert_is_op_input("fast_where_xpu", "condition"); auto condition1 = pattern->NewNode(condition1_repr()) diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index 90f5b93dcb2efa..5dae6c1c845148 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -320,7 +320,7 @@ void NaiveExecutor::ResetTrtOps(int num) { #endif } -void NaiveExecutor::CloneLiteEnigne(int num, void *stream) { +void NaiveExecutor::CloneLiteEngine(int num, void *stream) { #ifdef PADDLE_WITH_LITE for (auto &op : ops_) { if (op->Type() == "lite_engine") { diff --git a/paddle/fluid/framework/naive_executor.h b/paddle/fluid/framework/naive_executor.h index 8388bfe3a37fc1..1f56805a870209 100644 --- a/paddle/fluid/framework/naive_executor.h +++ b/paddle/fluid/framework/naive_executor.h @@ -90,7 +90,7 @@ class NaiveExecutor { void ResetTrtOps(int num); - void CloneLiteEnigne(int num, void* stream); + void CloneLiteEngine(int num, void* stream); void RegisterOutputHook(const HookFunc& hookfunc); void RegisterInputHook(const HookFunc& hookfunc); diff --git a/paddle/fluid/framework/new_executor/feed_fetch_utils.cc b/paddle/fluid/framework/new_executor/feed_fetch_utils.cc index 0a713b89727f61..99829de387c321 100644 --- a/paddle/fluid/framework/new_executor/feed_fetch_utils.cc +++ b/paddle/fluid/framework/new_executor/feed_fetch_utils.cc @@ -77,7 +77,7 @@ void SplitFeedTensors(const std::vector& feed_names, 0, phi::errors::InvalidArgument( "Split expects feed data (%s)'s dim[0] (%d) is " - "diviable by micro_batch_num (%d).", + "divisible by micro_batch_num (%d).", feed_names[i], numel_size, micro_batch_num)); @@ -211,7 +211,7 @@ void MergeTensors(const std::vector& tensors, tensor_dims[j], new_dim[j], phi::errors::InvalidArgument( - "DenseTensor.ddim[%d] should eaqual to %d, but is %d", + "DenseTensor.ddim[%d] should equal to %d, but is %d", j, new_dim[j], tensor_dims[j])); diff --git a/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.cc b/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.cc index d7ad210102b94b..523842438d3555 100644 --- a/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.cc +++ b/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.cc @@ -251,7 +251,7 @@ void IfInstruction::Run() { false_branch_inter_->Run({}, false); CopyBranchOutput(false_branch_outputs_, false_branch_inter_); } - // copy ouptut + // copy output } } // namespace framework diff --git a/paddle/fluid/framework/new_executor/instruction/custom_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/custom_kernel_instruction.cc index ceee6c7d91739e..720fb521439fc5 100644 --- a/paddle/fluid/framework/new_executor/instruction/custom_kernel_instruction.cc +++ b/paddle/fluid/framework/new_executor/instruction/custom_kernel_instruction.cc @@ -89,7 +89,7 @@ void CustomKernelInstruction::BuildCustomContext( input_ptrs_.emplace_back(nullptr); custom_kernel_ctx_.EmplaceBackInput(std::move(paddle::Tensor())); } - VLOG(8) << "ctx->EmplaceBackInput : an optioanl input " << t; + VLOG(8) << "ctx->EmplaceBackInput : an optional input " << t; continue; } auto in_var_name = value_exec_info_.GetVarName(ptr); @@ -285,7 +285,7 @@ void CustomKernelInstruction::BuildCustomContext( cache_out_ptrs_.emplace_back(nullptr); custom_kernel_ctx_.EmplaceBackOutput(std::move(paddle::Tensor())); - VLOG(8) << "ctx->EmplaceBackOutput : an optioanl output"; + VLOG(8) << "ctx->EmplaceBackOutput : an optional output"; continue; } diff --git a/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc b/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc index a788d7d898b1b7..237e8baa95daff 100644 --- a/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc +++ b/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc @@ -32,7 +32,7 @@ PADDLE_DEFINE_EXPORTED_bool( // The difference between "sequential_run" and "serial_run": // "sequential_run" dispatches OPs one by one according to the sequence in the -// Program, while "serial_run" ensures that all Ops are scheduled in a singal +// Program, while "serial_run" ensures that all Ops are scheduled in a signal // thread. In standalone executor, "sequential_run" is also "serial_run", while // "serial_run" is not necessarily "sequential_run". PADDLE_DEFINE_EXPORTED_bool(new_executor_sequential_run, @@ -206,13 +206,13 @@ void DependencyBuilder::AddDependencyForCoalesceTensorOp() { } // find first op read 'outputs' between (first_read_fused_out_op, end) - // add depned: first_read_fused_out_op -> first op that reads 'outputs' + // add depend: first_read_fused_out_op -> first op that reads 'outputs' // special case for consecutive communication ops, for example, // FusedOutput = c_sync_calc_stream(FusedOutput) // FusedOutput= c_allreduce_sum(FusedOutput) // FusedOutput = c_sync_comm_stream(FusedOutput) - // we should take the last one to add depned instead of + // we should take the last one to add depend instead of // 'first_read_fused_out_op' size_t target = first_read_fused_out_op; for (size_t j = first_read_fused_out_op + 1; j < op_num_; ++j) { @@ -355,8 +355,8 @@ void DependencyBuilder::AddDownstreamOp(size_t prior_op_idx, std::set& downstream_ops = (*op_downstream_map_)[prior_op_idx]; // NOTE(Ruibiao): Here the downstream map shrinking is best-effort, therefore // ShrinkDownstreamMap after BuildDownstreamMap is still helpful. For example, - // a->c will not be shrinked in the following case: AddDownstreamOp(a, b) -> - // AddDownstreamOp(a, c) -> AddDownstreamOp(b, c), it should be shrinked by + // a->c will not be shrunk in the following case: AddDownstreamOp(a, b) -> + // AddDownstreamOp(a, c) -> AddDownstreamOp(b, c), it should be shrunk by // ShrinkDownstreamMap. for (size_t op_idx : downstream_ops) { if (OpHappensBefore(op_idx, posterior_op_idx)) { @@ -531,7 +531,7 @@ void DependencyBuilder::ShrinkDownstreamMap() { } } // NOTE(Ruibiao): op_happens_before will not be changed when shrink - // dowstream map + // downstream map (*op_downstream_map_)[i] = minumum_nexts; } VLOG(8) << "Finish shrink downstream map"; @@ -963,7 +963,7 @@ void DependencyBuilderSimplify::ShrinkDownstreamMap() { } } // NOTE(Ruibiao): op_happens_before will not be changed when shrink - // dowstream map + // downstream map op_downstream_map_.at(i) = minumum_nexts; } VLOG(8) << "Finish shrink downstream map"; @@ -1031,13 +1031,13 @@ void DependencyBuilderSimplify::AddDependencyForCoalesceTensorOp() { } // find first op read 'outputs' between (first_read_fused_out_op, end) - // add depned: first_read_fused_out_op -> first op that reads 'outputs' + // add depend: first_read_fused_out_op -> first op that reads 'outputs' // special case for consecutive communication ops, for example, // FusedOutput = c_sync_calc_stream(FusedOutput) // FusedOutput= c_allreduce_sum(FusedOutput) // FusedOutput = c_sync_comm_stream(FusedOutput) - // we should take the last one to add depned instead of + // we should take the last one to add depend instead of // 'first_read_fused_out_op' size_t target = first_read_fused_out_op; for (size_t j = first_read_fused_out_op + 1; j < op_num_; ++j) { @@ -1236,8 +1236,8 @@ void DependencyBuilderSimplify::SetSameStream() { } } -// get_new_exector_order by dfs -std::vector DependencyBuilderSimplify::get_new_exexutor_order() { +// get_new_executor_order by dfs +std::vector DependencyBuilderSimplify::get_new_executor_order() { PADDLE_ENFORCE_EQ( is_build_, true, @@ -1288,17 +1288,17 @@ std::vector DependencyBuilderSimplify::get_new_exexutor_order() { is_visit[op_idx] = true; } - std::vector dependecy_count(op_num_, 0); + std::vector dependency_count(op_num_, 0); for (auto it : op_downstream_map_) { for (auto op_idx : it.second) { - dependecy_count[op_idx]++; + dependency_count[op_idx]++; } } std::stack s; std::priority_queue> pq; for (size_t op_idx = op_num_ - 1; op_idx >= start_index_; op_idx--) { - if (dependecy_count[op_idx] == 0) { + if (dependency_count[op_idx] == 0) { pq.push(std::make_pair(op_behind_num[op_idx], op_idx)); } } @@ -1318,7 +1318,7 @@ std::vector DependencyBuilderSimplify::get_new_exexutor_order() { for (auto it = op_downstream_map_[current].rbegin(); it != op_downstream_map_[current].rend(); it++) { - if (--dependecy_count[*it] == 0 && !not_usefull_op.count(current)) { + if (--dependency_count[*it] == 0 && !not_usefull_op.count(current)) { pq.push(std::make_pair(op_behind_num[*it], *it)); // s.push(*it); } @@ -1383,8 +1383,8 @@ void DependencyBuilderSimplify::AddDownstreamOp(size_t prior_op_idx, std::set& downstream_ops = op_downstream_map_[prior_op_idx]; // NOTE(Ruibiao): Here the downstream map shrinking is best-effort, therefore // ShrinkDownstreamMap after BuildDownstreamMap is still helpful. For example, - // a->c will not be shrinked in the following case: AddDownstreamOp(a, b) -> - // AddDownstreamOp(a, c) -> AddDownstreamOp(b, c), it should be shrinked by + // a->c will not be shrunk in the following case: AddDownstreamOp(a, b) -> + // AddDownstreamOp(a, c) -> AddDownstreamOp(b, c), it should be shrunk by // ShrinkDownstreamMap. for (size_t op_idx : downstream_ops) { if (OpHappensBefore(op_idx, posterior_op_idx)) { diff --git a/paddle/fluid/framework/new_executor/interpreter/dependency_builder.h b/paddle/fluid/framework/new_executor/interpreter/dependency_builder.h index 5670a8ea043476..bec13301ae996f 100644 --- a/paddle/fluid/framework/new_executor/interpreter/dependency_builder.h +++ b/paddle/fluid/framework/new_executor/interpreter/dependency_builder.h @@ -27,7 +27,7 @@ class InstructionBase; namespace interpreter { // DependencyBuilder provides some dependency adding function to handle the -// dependency that cannot be explicitly expresed by a Program. It is a +// dependency that cannot be explicitly expressed by a Program. It is a // compromise of the incomplete expression ability of the Program. Do not add // too many functions here at will, that will bring great burden to the // Interpretercore. @@ -146,7 +146,7 @@ class DependencyBuilderSimplify { phi::errors::Unavailable("op_happen_before is not yet built")); return op_happens_before_.at(prior_op_idx).at(posterior_op_idx); } - std::vector get_new_exexutor_order(); + std::vector get_new_executor_order(); private: void AddDependencyForCoalesceTensorOp(); diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.cc b/paddle/fluid/framework/new_executor/new_executor_defs.cc index a336e2c377dfd1..9ad65274846a6b 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.cc +++ b/paddle/fluid/framework/new_executor/new_executor_defs.cc @@ -315,7 +315,7 @@ void Instruction::AddInplace(Variable* in, Variable* out) { void Instruction::ClearInplace() { vec_inplace_in_to_out_.clear(); } #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -void Instruction::UpdataRecordStreamForGcInfo() { +void Instruction::UpdateRecordStreamForGcInfo() { if (!IsInterpretercoreFastGCEnabled() || KernelType() != OpFuncType::kGpuAsync) { return; diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.h b/paddle/fluid/framework/new_executor/new_executor_defs.h index 66773746deb274..ad74b5fc60746d 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.h +++ b/paddle/fluid/framework/new_executor/new_executor_defs.h @@ -309,7 +309,7 @@ class Instruction { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) bool need_record_stream_for_gc_ = false; gpuStream_t stream_{nullptr}; - void UpdataRecordStreamForGcInfo(); + void UpdateRecordStreamForGcInfo(); #endif bool can_use_infermeta_ctx_ = false; diff --git a/paddle/fluid/framework/new_executor/pir_interpreter.cc b/paddle/fluid/framework/new_executor/pir_interpreter.cc index 932f3c32830842..fcb190a7999223 100644 --- a/paddle/fluid/framework/new_executor/pir_interpreter.cc +++ b/paddle/fluid/framework/new_executor/pir_interpreter.cc @@ -107,7 +107,7 @@ PirInterpreter::PirInterpreter(const platform::Place& place, exception_notifier_ = main_thread_blocker_.RegisterEvent(kExceptionCaught); completion_notifier_ = main_thread_blocker_.RegisterEvent(kTaskCompletion); - dependecy_count_ = std::make_shared>(); + dependency_count_ = std::make_shared>(); if (!FLAGS_new_executor_use_local_scope) { execution_config_.create_local_scope = false; @@ -170,7 +170,7 @@ PirInterpreter::PirInterpreter( exception_notifier_ = main_thread_blocker_.RegisterEvent(kExceptionCaught); completion_notifier_ = main_thread_blocker_.RegisterEvent(kTaskCompletion); - dependecy_count_ = std::make_shared>(); + dependency_count_ = std::make_shared>(); if (!FLAGS_new_executor_use_local_scope) { execution_config_.create_local_scope = false; @@ -207,7 +207,7 @@ PirInterpreter::PirInterpreter( } PirInterpreter::~PirInterpreter() { - // cancle gc's thread + // cancel gc's thread gc_.reset(nullptr); async_work_queue_.reset(); VLOG(4) << "~PirInterpreter(): " << this << " on " << place_; @@ -289,7 +289,7 @@ void PirInterpreter::ShareBuildResultsFrom(const InterpreterBaseImpl& src) { } // share op dependency ir_dependency_builder_.ShareDependencyFrom(impl.GetPirDependencyBuilder()); - dependecy_count_ = impl.GetDependencyCount(); + dependency_count_ = impl.GetDependencyCount(); // share event analysis ir_stream_analyzer_.ShareEventInfoFrom(impl.GetPirStreamAnalyzer()); is_shared_results_build_ = true; @@ -313,7 +313,7 @@ PirInterpreter::GetPirDependencyBuilder() const { std::shared_ptr> PirInterpreter::GetDependencyCount() const { - return dependecy_count_; + return dependency_count_; } const interpreter::PirStreamAnalyzer& PirInterpreter::GetPirStreamAnalyzer() @@ -629,8 +629,8 @@ void PirInterpreter::AnalyseExecuteOrderForTrace( if (VLOG_IS_ON(2)) { ss << "\nLeaf nodes: "; } - for (size_t instr_id = 0; instr_id < dependecy_count_->size(); ++instr_id) { - if ((*dependecy_count_)[instr_id] == 0) { + for (size_t instr_id = 0; instr_id < dependency_count_->size(); ++instr_id) { + if ((*dependency_count_)[instr_id] == 0) { ready_ops.push(instr_id); if (VLOG_IS_ON(2)) { ss << instr_id << "[" << vec_instruction_base_[instr_id]->Name() @@ -663,9 +663,9 @@ void PirInterpreter::AnalyseExecuteOrderForTrace( PADDLE_ENFORCE_EQ( trace_order.size(), - dependecy_count_->size(), + dependency_count_->size(), platform::errors::PreconditionNotMet( - "trace_order size should be equal to dependecy_count_.")); + "trace_order size should be equal to dependency_count_.")); trace_execute_order_ = trace_order; @@ -893,11 +893,11 @@ std::vector PirInterpreter::DebugInfo() { void PirInterpreter::BuildInstructionDependences() { // analysis the dependences between instructions, add next_instr_list to each - // instr, and set the dependecy_count_ + // instr, and set the dependency_count_ size_t instr_num = vec_instruction_base_.size(); - dependecy_count_ = GetDependencyCount(); + dependency_count_ = GetDependencyCount(); if (!is_shared_results_build_) { - dependecy_count_->assign(instr_num, 0); + dependency_count_->assign(instr_num, 0); } std::vector instructions_ptr; for (auto& instr : vec_instruction_base_) { @@ -940,7 +940,7 @@ void PirInterpreter::BuildInstructionDependences() { if (!is_shared_results_build_) { for (size_t next_instr_id : next_instr_ids) { - ++(*dependecy_count_)[next_instr_id]; + ++(*dependency_count_)[next_instr_id]; } } } @@ -1013,7 +1013,7 @@ void PirInterpreter::RecordStreamForGC(InstructionBase* instr) { memory::RecordStream(allocation, stream); } else if (platform::is_cuda_pinned_place(place)) { // TODO(Ruibiao): Here should do something to make sure that the tensor - // is not freed until the H2D copies done. However, simplely launch a + // is not freed until the H2D copies done. However, simply launch a // CUDA runtime callback to the H2D stream may lead a high performance // overhead. As all the cases we meet in H2D are copies from CPUPlace at // present, we just log a WARNING here. A better design is required. @@ -1037,7 +1037,7 @@ void PirInterpreter::RecordStreamForGC(InstructionBase* instr) { * async CUDA kernel. * * Here we only process the first condition, because: - * 1. Since the RecordStream function will directly return when the recored + * 1. Since the RecordStream function will directly return when the recorded * stream is equal to the owning stream, recording a stream same as which * initialized this tensor has less time overhead. Conversely, it may take * more time if we try to extract those cross-stream input vars from @@ -1235,7 +1235,7 @@ void PirInterpreter::CalculateLastLiveOps() { } VLOG(4) << "shrink the last_live_ops list for all vars in skip_gc_vars"; - for (auto& dep : *dependecy_count_) { + for (auto& dep : *dependency_count_) { deps_.emplace_back(std::make_shared(dep)); } for (size_t i = 0; i < value_exe_info_->GetVarList().size(); ++i) { @@ -1246,8 +1246,8 @@ void PirInterpreter::CalculateLastLiveOps() { } void PirInterpreter::ConstructEventForJitInput() { - for (size_t i = 0; i < dependecy_count_->size(); ++i) { - if ((*dependecy_count_)[i] == 0) { + for (size_t i = 0; i < dependency_count_->size(); ++i) { + if ((*dependency_count_)[i] == 0) { InstructionBase* inst = vec_instruction_base_[i].get(); if (inst->Name() == "pd_op.memcpy_d2h" && platform::is_gpu_place(place_)) { @@ -1514,8 +1514,8 @@ void PirInterpreter::TraceRunInstructionList( } } - for (size_t i = 0; i < dependecy_count_->size(); ++i) { - if ((*dependecy_count_)[i] == 0) { + for (size_t i = 0; i < dependency_count_->size(); ++i) { + if ((*dependency_count_)[i] == 0) { // NOTE(zhiqiu): hot fix for jit input var RecordMemcpyD2H(vec_instr.at(i).get()); } @@ -1571,8 +1571,8 @@ void PirInterpreter::MultiThreadRunInstructionList( } } - for (size_t i = 0; i < dependecy_count_->size(); ++i) { - if ((*dependecy_count_)[i] == 0) { + for (size_t i = 0; i < dependency_count_->size(); ++i) { + if ((*dependency_count_)[i] == 0) { // NOTE(zhiqiu): hot fix for jit input var RecordMemcpyD2H(vec_instr.at(i).get()); if (FLAGS_new_executor_serial_run) { @@ -1794,7 +1794,7 @@ void PirInterpreter::PreAnalysis() { BuildInstructionDependences(); VLOG(4) << "Done BuildInstructionDependences"; - ir_stream_analyzer_.SetForceEventsToWaitInfo(force_evnets_to_wait_); + ir_stream_analyzer_.SetForceEventsToWaitInfo(force_events_to_wait_); ir_stream_analyzer_.ConstructEvents(vec_instruction_base_); VLOG(4) << "Done ConstructEvents"; diff --git a/paddle/fluid/framework/new_executor/pir_interpreter.h b/paddle/fluid/framework/new_executor/pir_interpreter.h index be959ca723163e..ee1bd662820c99 100644 --- a/paddle/fluid/framework/new_executor/pir_interpreter.h +++ b/paddle/fluid/framework/new_executor/pir_interpreter.h @@ -111,13 +111,13 @@ class PirInterpreter : public InterpreterBaseImpl { std::unordered_map>* GetForceEventsToWaitInfo() { - return force_evnets_to_wait_; + return force_events_to_wait_; } void SetForceEventsToWaitInfo( std::unordered_map>* - force_evnets_to_wait) { - force_evnets_to_wait_ = force_evnets_to_wait; + force_events_to_wait) { + force_events_to_wait_ = force_events_to_wait; } private: @@ -168,7 +168,7 @@ class PirInterpreter : public InterpreterBaseImpl { ExecutionConfig execution_config_; std::unordered_map>* - force_evnets_to_wait_; + force_events_to_wait_; VariableScope var_scope_; Scope* scope_{nullptr}; @@ -187,9 +187,9 @@ class PirInterpreter : public InterpreterBaseImpl { // var std::map> last_live_ops_; - // (*dependecy_count_)[i] contains the number of dependencies that the i-th op - // need to wait - std::shared_ptr> dependecy_count_; + // (*dependency_count_)[i] contains the number of dependencies that the i-th + // op need to wait + std::shared_ptr> dependency_count_; std::vector> deps_; std::vector> refs_; diff --git a/paddle/fluid/framework/new_executor/program_interpreter.cc b/paddle/fluid/framework/new_executor/program_interpreter.cc index 0f50665e1621e8..e6e3060cda94a5 100644 --- a/paddle/fluid/framework/new_executor/program_interpreter.cc +++ b/paddle/fluid/framework/new_executor/program_interpreter.cc @@ -63,7 +63,7 @@ ProgramInterpreter::ProgramInterpreter(const platform::Place& place, exception_notifier_ = main_thread_blocker_.RegisterEvent(kExceptionCaught); completion_notifier_ = main_thread_blocker_.RegisterEvent(kTaskCompletion); - dependecy_count_ = std::make_shared>(); + dependency_count_ = std::make_shared>(); if (!FLAGS_new_executor_use_local_scope) { execution_config_.create_local_scope = false; @@ -100,7 +100,7 @@ ProgramInterpreter::ProgramInterpreter(const platform::Place& place, } ProgramInterpreter::~ProgramInterpreter() { - // cancle gc's thread + // cancel gc's thread gc_.reset(nullptr); async_work_queue_.reset(); VLOG(4) << "~ProgramInterpreter(): " << this << " on " << place_; @@ -355,7 +355,7 @@ void ProgramInterpreter::ShareBuildResultsFrom(const InterpreterBaseImpl& src) { } // share op dependency dependency_builder_.ShareDependencyFrom(impl.GetDependencyBuilder()); - dependecy_count_ = impl.GetDependencyCount(); + dependency_count_ = impl.GetDependencyCount(); // share event analysis stream_analyzer_.ShareEventInfoFrom(impl.GetStreamAnalyzer()); is_shared_results_build_ = true; @@ -399,7 +399,7 @@ const interpreter::DependencyBuilder& ProgramInterpreter::GetDependencyBuilder() std::shared_ptr> ProgramInterpreter::GetDependencyCount() const { - return dependecy_count_; + return dependency_count_; } const interpreter::StreamAnalyzer& ProgramInterpreter::GetStreamAnalyzer() @@ -452,7 +452,7 @@ void ProgramInterpreter::BuildAndCacheInstructionCtx(Instruction* instr_node) { void ProgramInterpreter::BuildInplace() { // NOTE(Ruibiao): coalesce_tensor_op outputs a FusedOutput phi::DenseTensor // and a list of Output Tensors which are sliced from the FusedOutput. These - // outputs sholud not be the outvar of the in-place var-pair since memory + // outputs should not be the outvar of the in-place var-pair since memory // reuse between FusedOutput and Output Tensors is assumed. For the following // example: // fused_var, var1, var2, var3 = coalesce_tensor(var1, var2, var3) @@ -603,11 +603,11 @@ void ProgramInterpreter::CheckCUDAGraphBeforeRun( void ProgramInterpreter::BuildOperatorDependences() { // analysis the dependences between ops, add next_instr_list to each instr, - // and set the dependecy_count_ + // and set the dependency_count_ size_t instr_num = vec_instruction_.size(); - dependecy_count_ = GetDependencyCount(); + dependency_count_ = GetDependencyCount(); if (!is_shared_results_build_) { - dependecy_count_->assign(instr_num, 0); + dependency_count_->assign(instr_num, 0); } auto downstream_map = dependency_builder_.Build(vec_instruction_); @@ -647,7 +647,7 @@ void ProgramInterpreter::BuildOperatorDependences() { if (!is_shared_results_build_) { for (size_t next_instr_id : next_instr_ids) { - ++(*dependecy_count_)[next_instr_id]; + ++(*dependency_count_)[next_instr_id]; } } } @@ -692,8 +692,8 @@ void ProgramInterpreter::Convert( // add event for the input var of jit program, since there are async copied // from gpu_pinned place to gpu place on compute stream. - for (size_t i = 0; i < dependecy_count_->size(); ++i) { - if ((*dependecy_count_)[i] == 0) { + for (size_t i = 0; i < dependency_count_->size(); ++i) { + if ((*dependency_count_)[i] == 0) { auto& inst = vec_instruction_[i]; if (inst.OpBase()->Type() == interpreter::kMemcpyD2H && platform::is_gpu_place(place_)) { @@ -840,7 +840,7 @@ void ProgramInterpreter::Convert( BuildInplace(); } - for (auto& dep : *dependecy_count_) { + for (auto& dep : *dependency_count_) { deps_.emplace_back(std::make_shared(dep)); } for (size_t i = 0; i < vec_meta_info.size(); ++i) { @@ -860,7 +860,7 @@ void ProgramInterpreter::BuildOpFuncNode( vec_instruction_.reserve(op_nums); for (size_t op_idx = 0; op_idx < op_nums; ++op_idx) { auto& op_func_node = nodes[op_idx]; - stream_analyzer_.SetForceEventsToWaitInfo(force_evnets_to_wait_); + stream_analyzer_.SetForceEventsToWaitInfo(force_events_to_wait_); auto* dev_ctx_ = stream_analyzer_.ParseDeviceContext(op_func_node); #ifdef PADDLE_WITH_CUDA if (FLAGS_new_executor_use_cuda_graph) { @@ -887,7 +887,7 @@ void ProgramInterpreter::BuildOpFuncNode( vec_instruction_.emplace_back(op_idx, std::move(op_func_node), *dev_ctx_); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - vec_instruction_.back().UpdataRecordStreamForGcInfo(); + vec_instruction_.back().UpdateRecordStreamForGcInfo(); #endif } } @@ -1245,8 +1245,8 @@ void ProgramInterpreter::ExecuteInstructionList( } } - for (size_t i = 0; i < dependecy_count_->size(); ++i) { - if ((*dependecy_count_)[i] == 0) { + for (size_t i = 0; i < dependency_count_->size(); ++i) { + if ((*dependency_count_)[i] == 0) { // NOTE(zhiqiu): hot fix for jit input var RecordMemcpyD2H(vec_instr.at(i)); if (FLAGS_new_executor_serial_run) { @@ -1396,7 +1396,7 @@ void ProgramInterpreter::RecordStreamForGC(const Instruction& instr) { memory::RecordStream(allocation, stream); } else if (platform::is_cuda_pinned_place(place)) { // TODO(Ruibiao): Here should do something to make sure that the tensor - // is not freed until the H2D copies done. However, simplely launch a + // is not freed until the H2D copies done. However, simply launch a // CUDA runtime callback to the H2D stream may lead a high performance // overhead. As all the cases we meet in H2D are copies from CPUPlace at // present, we just log a WARNING here. A better design is required. @@ -1420,7 +1420,7 @@ void ProgramInterpreter::RecordStreamForGC(const Instruction& instr) { * async CUDA kernel. * * Here we only process the first condition, because: - * 1. Since the RecordStream function will directly return when the recored + * 1. Since the RecordStream function will directly return when the recorded * stream is equal to the owning stream, recording a stream same as which * initialized this tensor has less time overhead. Conversely, it may take * more time if we try to extract those cross-stream input vars from @@ -1609,8 +1609,8 @@ void ProgramInterpreter::TraceInstructionList( exception_holder_.Clear(); - for (size_t i = 0; i < dependecy_count_->size(); ++i) { - if ((*dependecy_count_)[i] == 0) { + for (size_t i = 0; i < dependency_count_->size(); ++i) { + if ((*dependency_count_)[i] == 0) { // NOTE(zhiqiu): hot fix for jit input var RecordMemcpyD2H(vec_instr.at(i)); } @@ -1687,8 +1687,8 @@ void ProgramInterpreter::AnalyseExecuteOrderForTrace() { std::vector trace_order; SchedulingQueue ready_ops(instruction_scheduling_priority_less); - for (size_t instr_id = 0; instr_id < dependecy_count_->size(); ++instr_id) { - if ((*dependecy_count_)[instr_id] == 0) { + for (size_t instr_id = 0; instr_id < dependency_count_->size(); ++instr_id) { + if ((*dependency_count_)[instr_id] == 0) { ready_ops.push(instr_id); } } @@ -1709,9 +1709,9 @@ void ProgramInterpreter::AnalyseExecuteOrderForTrace() { PADDLE_ENFORCE_EQ( trace_order.size(), - dependecy_count_->size(), + dependency_count_->size(), platform::errors::PreconditionNotMet( - "trace_order size should be equal to dependecy_count_.")); + "trace_order size should be equal to dependency_count_.")); trace_execute_order_ = trace_order; diff --git a/paddle/fluid/framework/new_executor/program_interpreter.h b/paddle/fluid/framework/new_executor/program_interpreter.h index 5359c41fddcdc6..7e956249e22a38 100644 --- a/paddle/fluid/framework/new_executor/program_interpreter.h +++ b/paddle/fluid/framework/new_executor/program_interpreter.h @@ -103,13 +103,13 @@ class ProgramInterpreter : public InterpreterBaseImpl { std::unordered_map>* GetForceEventsToWaitInfo() { - return force_evnets_to_wait_; + return force_events_to_wait_; } void SetForceEventsToWaitInfo( std::unordered_map>* - force_evnets_to_wait) { - force_evnets_to_wait_ = force_evnets_to_wait; + force_events_to_wait) { + force_events_to_wait_ = force_events_to_wait; } bool IsStaticBuild() const override { return static_build_; } @@ -205,7 +205,7 @@ class ProgramInterpreter : public InterpreterBaseImpl { ExecutionConfig execution_config_; std::unordered_map>* - force_evnets_to_wait_; + force_events_to_wait_; VariableScope var_scope_; Scope* local_scope_{nullptr}; // not owned @@ -223,9 +223,9 @@ class ProgramInterpreter : public InterpreterBaseImpl { // var std::map> last_live_ops_; - // (*dependecy_count_)[i] contains the number of dependencies that the i-th op - // need to wait - std::shared_ptr> dependecy_count_; + // (*dependency_count_)[i] contains the number of dependencies that the i-th + // op need to wait + std::shared_ptr> dependency_count_; std::vector> deps_; std::vector> refs_; diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 491fda2e9d59a3..6bc29d918d124e 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -59,7 +59,7 @@ PADDLE_DEFINE_EXPORTED_string( pe_profile_fname, "", "Profiler filename for PE, which generated by gperftools." - "Only valid when compiled `WITH_PRIFILER=ON`. Empty if disable."); + "Only valid when compiled `WITH_PROFILER=ON`. Empty if disable."); namespace paddle { namespace framework { @@ -124,7 +124,7 @@ class ParallelExecutorPrivate { * NOTE(zengjinle): the fed variables of users should not be reused, * because users may feed them into another network. Changing the fed * variables that users can visit may cause calculation wrong, which is - * a very subtle bug when traning networks. However, these variables + * a very subtle bug when training networks. However, these variables * can be garbage collected. * * ParallelExecutor provides 2 methods to feed variables: diff --git a/paddle/fluid/framework/ps_gpu_worker.cc b/paddle/fluid/framework/ps_gpu_worker.cc index 7daab881bea464..4cc03b95abc525 100644 --- a/paddle/fluid/framework/ps_gpu_worker.cc +++ b/paddle/fluid/framework/ps_gpu_worker.cc @@ -185,7 +185,7 @@ void PSGPUWorker::Initialize(const TrainerDesc& desc) { << dest_table; copy_dense_tables_.push_back(std::make_pair(src_table, dest_table)); } - for (auto& m : copy_table_config_.table_denpendency_map()) { + for (auto& m : copy_table_config_.table_dependency_map()) { if (sparse_key_names_.find(m.key()) != sparse_key_names_.end()) { // currently only support one dependency for (auto& value : m.values()) { diff --git a/paddle/fluid/framework/trainer_desc.proto b/paddle/fluid/framework/trainer_desc.proto index e3abbd210f25a4..218ec423b6baf9 100644 --- a/paddle/fluid/framework/trainer_desc.proto +++ b/paddle/fluid/framework/trainer_desc.proto @@ -172,7 +172,7 @@ message CopyTableConfig { optional bool sparse_copy_by_feasign = 10 [ default = true ]; // table dependency for pull/push optional bool enable_dependency = 11 [ default = false ]; - repeated TableDependencyMap table_denpendency_map = 12; + repeated TableDependencyMap table_dependency_map = 12; } message CondTableMap { diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 28bb03e52c8e74..ba3577694c55ac 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -2875,10 +2875,10 @@ std::unique_ptr AnalysisPredictor::Clone(void *stream) { #endif #ifdef PADDLE_WITH_LITE #ifdef LITE_SUBGRAPH_WITH_XPU - x->executor_->CloneLiteEnigne(++AnalysisPredictor::clone_num_, + x->executor_->CloneLiteEngine(++AnalysisPredictor::clone_num_, config_.xpu_config_.stream); #else - x->executor_->CloneLiteEnigne(++AnalysisPredictor::clone_num_, nullptr); + x->executor_->CloneLiteEngine(++AnalysisPredictor::clone_num_, nullptr); #endif #endif return std::unique_ptr(x); diff --git a/paddle/fluid/jit/property.h b/paddle/fluid/jit/property.h index 4a3fae89f6a9dc..6b9889818251cd 100644 --- a/paddle/fluid/jit/property.h +++ b/paddle/fluid/jit/property.h @@ -109,7 +109,7 @@ class Property { // Note: the id_ is unique for all Property (only for auto parallel). uint64_t id_ = GenerateId(); - // Note: the orignal_id_ is used for referring to the original Property + // Note: the original_id_ is used for referring to the original Property // that the current Property is built from (only for auto parallel). // The default original_id_ is same as the id_, which means the // current Property is not built from the other one. diff --git a/python/paddle/base/trainer_desc.py b/python/paddle/base/trainer_desc.py index 65436105d05930..69a5ef4b4433d9 100644 --- a/python/paddle/base/trainer_desc.py +++ b/python/paddle/base/trainer_desc.py @@ -286,7 +286,7 @@ def _set_copy_table_config(self, config_dict): dependency_map = config_dict.get("dependency_map", {}) for key in dependency_map: - m = config.table_denpendency_map.add() + m = config.table_dependency_map.add() m.key = key values = dependency_map[key] if not isinstance(values, list): From 39177b2f6279f03fb1b2765368515ae59fb12129 Mon Sep 17 00:00:00 2001 From: co63oc Date: Sun, 28 Jan 2024 12:41:49 +0800 Subject: [PATCH 2/2] ci