Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion paddle/fluid/framework/downpour_lite_worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ void DownpourLiteWorker::Initialize(const TrainerDesc& desc) {
<< dest_table;
copy_dense_tables_.push_back(std::make_pair(src_table, dest_table));
}
for (auto& m : copy_table_config_.table_denpendency_map()) {
for (auto& m : copy_table_config_.table_dependency_map()) {
if (sparse_key_names_.find(m.key()) != sparse_key_names_.end()) {
// currently only support one dependency
for (auto& value : m.values()) {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/downpour_worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ void DownpourWorker::Initialize(const TrainerDesc& desc) {
<< dest_table;
copy_dense_tables_.emplace_back(src_table, dest_table);
}
for (auto& m : copy_table_config_.table_denpendency_map()) {
for (auto& m : copy_table_config_.table_dependency_map()) {
if (sparse_key_names_.find(m.key()) != sparse_key_names_.end()) {
// currently only support one dependency
for (auto& value : m.values()) {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/downpour_worker_opt.cc
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ void DownpourWorkerOpt::Initialize(const TrainerDesc& desc) {
<< dest_table;
copy_dense_tables_.emplace_back(src_table, dest_table);
}
for (auto& m : copy_table_config_.table_denpendency_map()) {
for (auto& m : copy_table_config_.table_dependency_map()) {
if (sparse_key_names_.find(m.key()) != sparse_key_names_.end()) {
// currently only support one dependency
for (auto& value : m.values()) {
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/fleet/heter_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ class HeterContext {
}
}
} else {
VLOG(3) << "Reset gpu task with dynamic mf dimention";
VLOG(3) << "Reset gpu task with dynamic mf dimension";
for (size_t i = 0; i < feature_dim_keys_.size(); i++) {
for (size_t j = 0; j < feature_dim_keys_[i].size(); j++) {
feature_dim_keys_[i][j].clear();
Expand Down Expand Up @@ -262,7 +262,7 @@ class HeterContext {
threads.push_back(std::thread(unique_dynamic_mf_func, i, j));
}
}
VLOG(3) << "heter_context unique keys with dynamic mf dimention";
VLOG(3) << "heter_context unique keys with dynamic mf dimension";
}
for (std::thread& t : threads) {
t.join();
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/fleet/ps_gpu_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2752,7 +2752,7 @@ void PSGPUWrapper::PushSparseGrad(const paddle::platform::Place& place,
VLOG(3) << "Begin GPUPS PushSparseGrad";

auto buf = memory::Alloc(place, total_length * grad_value_size);
VLOG(3) << "Push Sparse Max mf dimention: " << max_mf_dim_
VLOG(3) << "Push Sparse Max mf dimension: " << max_mf_dim_
<< "grad_value_size:" << grad_value_size;
float* total_grad_values_gpu = reinterpret_cast<float*>(buf->ptr());

Expand Down Expand Up @@ -2790,7 +2790,7 @@ void PSGPUWrapper::PushSparseGrad(const paddle::platform::Place& place,
VLOG(3) << "Begin GPUPS PushSparseGrad";

auto buf = memory::Alloc(place, total_length * grad_value_size);
VLOG(3) << "Push Sparse Max mf dimention: " << max_mf_dim_
VLOG(3) << "Push Sparse Max mf dimension: " << max_mf_dim_
<< "grad_value_size:" << grad_value_size;
float* total_grad_values_gpu = reinterpret_cast<float*>(buf->ptr());
phi::DenseTensor& total_keys_tensor = keys_tensor[devid_2_index];
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/hetercpu_worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ void HeterCpuWorker::Initialize(const TrainerDesc& desc) {
<< dest_table;
copy_dense_tables_.push_back(std::make_pair(src_table, dest_table));
}
for (auto& m : copy_table_config_.table_denpendency_map()) {
for (auto& m : copy_table_config_.table_dependency_map()) {
if (sparse_key_names_.find(m.key()) != sparse_key_names_.end()) {
// currently only support one dependency
for (auto& value : m.values()) {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/hogwild_worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -805,7 +805,7 @@ void HogwildWorker::CreateThreadOperators(const ProgramDesc &program) {
// depend_builder.Build(ops_, start_index, sharding_mode_); hbm not safe
// should run in debug model need to fix
depend_builder.Build(ops_, start_index, false);
new_order = depend_builder.get_new_exexutor_order();
new_order = depend_builder.get_new_executor_order();
std::vector<std::unique_ptr<OperatorBase>> new_ops;
std::vector<size_t> final_order;
std::vector<std::string> new_op_names;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1805,7 +1805,7 @@ int FusedMultiTransformerEncoderPass::BuildFusion(Graph* graph,
auto* bv_tensor =
scope->FindVar(eltadd2_b->Name())->GetMutable<phi::DenseTensor>();

// NOTE(minghaoBD): to make it compatible with strucutured pruning on
// NOTE(minghaoBD): to make it compatible with structured pruning on
// num_head dimension:
// 1. get dim_head from reshape.shape[3], dim_embed from
// layer_norm_bias.shape[0]
Expand Down Expand Up @@ -1952,7 +1952,7 @@ int FusedMultiTransformerEncoderPass::BuildFusion(Graph* graph,
auto ffn1_in_scale = PADDLE_GET_CONST(
float, ffn_matmul_1_op->GetAttr("Input_scale_" + ffn1_input_name));

// Calc outscale and Set them
// Calc out scale and Set them
auto qkv_weight_scale =
PADDLE_GET_CONST(float, matmul0_op->GetAttr("weight_scale"));
auto out_weight_scale =
Expand Down Expand Up @@ -2629,7 +2629,7 @@ int FusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
auto* qkv_b_tensor =
scope->FindVar(eltadd0_b->Name())->GetMutable<phi::DenseTensor>();

// NOTE(minghaoBD): to make it compatible with strucutured pruning on
// NOTE(minghaoBD): to make it compatible with structured pruning on
// num_head dimension:
// 1. get dim_head from reshape.shape[3], dim_embed from
// layer_norm_bias.shape[0]
Expand Down Expand Up @@ -2758,9 +2758,9 @@ int FusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
auto ffn1_in_scale = PADDLE_GET_CONST(
float, ffn_matmul_1_op->GetAttr("Input_scale_" + ffn1_input_name));

// Calc outscale and Set them
// Calc out scale and Set them
// TODO(wufeisheng): Currently just match layer-wise weight scale, where
// channel-wise weight scale should also be surpported.
// channel-wise weight scale should also be supported.
auto qkv_weight_scale =
PADDLE_GET_CONST(float, matmul0_op->GetAttr("weight_scale"));
auto out_weight_scale =
Expand Down Expand Up @@ -4267,7 +4267,7 @@ int MultiDevicesFusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
auto* qkv_b_tensor =
scope->FindVar(eltadd0_b->Name())->GetMutable<phi::DenseTensor>();

// NOTE(minghaoBD): to make it compatible with strucutured pruning on
// NOTE(minghaoBD): to make it compatible with structured pruning on
// num_head dimension:
// 1. get dim_head from reshape.shape[3], dim_embed from
// layer_norm_bias.shape[0]
Expand Down Expand Up @@ -4407,7 +4407,7 @@ int MultiDevicesFusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
auto ffn1_in_scale = PADDLE_GET_CONST(
float, ffn_matmul_1_op->GetAttr("Input_scale_" + ffn1_input_name));

// Calc outscale and Set them
// Calc out scale and Set them
auto qkv_weight_scale =
PADDLE_GET_CONST(float, matmul0_op->GetAttr("weight_scale"));
auto out_weight_scale =
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ PDNode* MultiHeadMatmulPattern::operator()() {
}

PDNode* MultiHeadMatmulV3Pattern::operator()() {
// Add mul op to support huggingface onnx model convertsion by x2paddle
// Add mul op to support huggingface onnx model conversion by x2paddle
std::unordered_set<std::string> matmul_ops{"mul", "matmul", "matmul_v2"};
auto* input0 = pattern->NewNode(input0_repr());
input0->assert_is_ops_input(matmul_ops);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ namespace framework {
namespace ir {
namespace patterns {
/*
* \brief Fuse the subgraph representing multihead attention part of roformer
* \brief Fuse the subgraph representing multi-head attention part of roformer
* into multihead_matmul_roformer op.
*
* \note The following graph represents this equation:
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/ir/xpu/fast_where_xpu_fuse_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ CascadeFastWhereXPUPattern::CascadeFastWhereXPUPattern(
pattern->NewNode(fast_where_xpu0_repr())->assert_is_op("fast_where_xpu");
auto fast_where_xpu1 =
pattern->NewNode(fast_where_xpu1_repr())->assert_is_op("fast_where_xpu");
// declare vairable nodes
// declare variable nodes
auto condition0 = pattern->NewNode(condition0_repr())
->assert_is_op_input("fast_where_xpu", "condition");
auto condition1 = pattern->NewNode(condition1_repr())
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/naive_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ void NaiveExecutor::ResetTrtOps(int num) {
#endif
}

void NaiveExecutor::CloneLiteEnigne(int num, void *stream) {
void NaiveExecutor::CloneLiteEngine(int num, void *stream) {
#ifdef PADDLE_WITH_LITE
for (auto &op : ops_) {
if (op->Type() == "lite_engine") {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/naive_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class NaiveExecutor {

void ResetTrtOps(int num);

void CloneLiteEnigne(int num, void* stream);
void CloneLiteEngine(int num, void* stream);

void RegisterOutputHook(const HookFunc& hookfunc);
void RegisterInputHook(const HookFunc& hookfunc);
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/new_executor/feed_fetch_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ void SplitFeedTensors(const std::vector<std::string>& feed_names,
0,
phi::errors::InvalidArgument(
"Split expects feed data (%s)'s dim[0] (%d) is "
"diviable by micro_batch_num (%d).",
"divisible by micro_batch_num (%d).",
feed_names[i],
numel_size,
micro_batch_num));
Expand Down Expand Up @@ -211,7 +211,7 @@ void MergeTensors(const std::vector<const phi::DenseTensor*>& tensors,
tensor_dims[j],
new_dim[j],
phi::errors::InvalidArgument(
"DenseTensor.ddim[%d] should eaqual to %d, but is %d",
"DenseTensor.ddim[%d] should equal to %d, but is %d",
j,
new_dim[j],
tensor_dims[j]));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ void IfInstruction::Run() {
false_branch_inter_->Run({}, false);
CopyBranchOutput(false_branch_outputs_, false_branch_inter_);
}
// copy ouptut
// copy output
}

} // namespace framework
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ void CustomKernelInstruction::BuildCustomContext(
input_ptrs_.emplace_back(nullptr);
custom_kernel_ctx_.EmplaceBackInput(std::move(paddle::Tensor()));
}
VLOG(8) << "ctx->EmplaceBackInput : an optioanl input " << t;
VLOG(8) << "ctx->EmplaceBackInput : an optional input " << t;
continue;
}
auto in_var_name = value_exec_info_.GetVarName(ptr);
Expand Down Expand Up @@ -285,7 +285,7 @@ void CustomKernelInstruction::BuildCustomContext(
cache_out_ptrs_.emplace_back(nullptr);
custom_kernel_ctx_.EmplaceBackOutput(std::move(paddle::Tensor()));

VLOG(8) << "ctx->EmplaceBackOutput : an optioanl output";
VLOG(8) << "ctx->EmplaceBackOutput : an optional output";
continue;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ PADDLE_DEFINE_EXPORTED_bool(

// The difference between "sequential_run" and "serial_run":
// "sequential_run" dispatches OPs one by one according to the sequence in the
// Program, while "serial_run" ensures that all Ops are scheduled in a singal
// Program, while "serial_run" ensures that all Ops are scheduled in a signal
// thread. In standalone executor, "sequential_run" is also "serial_run", while
// "serial_run" is not necessarily "sequential_run".
PADDLE_DEFINE_EXPORTED_bool(new_executor_sequential_run,
Expand Down Expand Up @@ -206,13 +206,13 @@ void DependencyBuilder::AddDependencyForCoalesceTensorOp() {
}

// find first op read 'outputs' between (first_read_fused_out_op, end)
// add depned: first_read_fused_out_op -> first op that reads 'outputs'
// add depend: first_read_fused_out_op -> first op that reads 'outputs'

// special case for consecutive communication ops, for example,
// FusedOutput = c_sync_calc_stream(FusedOutput)
// FusedOutput= c_allreduce_sum(FusedOutput)
// FusedOutput = c_sync_comm_stream(FusedOutput)
// we should take the last one to add depned instead of
// we should take the last one to add depend instead of
// 'first_read_fused_out_op'
size_t target = first_read_fused_out_op;
for (size_t j = first_read_fused_out_op + 1; j < op_num_; ++j) {
Expand Down Expand Up @@ -355,8 +355,8 @@ void DependencyBuilder::AddDownstreamOp(size_t prior_op_idx,
std::set<size_t>& downstream_ops = (*op_downstream_map_)[prior_op_idx];
// NOTE(Ruibiao): Here the downstream map shrinking is best-effort, therefore
// ShrinkDownstreamMap after BuildDownstreamMap is still helpful. For example,
// a->c will not be shrinked in the following case: AddDownstreamOp(a, b) ->
// AddDownstreamOp(a, c) -> AddDownstreamOp(b, c), it should be shrinked by
// a->c will not be shrunk in the following case: AddDownstreamOp(a, b) ->
// AddDownstreamOp(a, c) -> AddDownstreamOp(b, c), it should be shrunk by
// ShrinkDownstreamMap.
for (size_t op_idx : downstream_ops) {
if (OpHappensBefore(op_idx, posterior_op_idx)) {
Expand Down Expand Up @@ -531,7 +531,7 @@ void DependencyBuilder::ShrinkDownstreamMap() {
}
}
// NOTE(Ruibiao): op_happens_before will not be changed when shrink
// dowstream map
// downstream map
(*op_downstream_map_)[i] = minumum_nexts;
}
VLOG(8) << "Finish shrink downstream map";
Expand Down Expand Up @@ -963,7 +963,7 @@ void DependencyBuilderSimplify::ShrinkDownstreamMap() {
}
}
// NOTE(Ruibiao): op_happens_before will not be changed when shrink
// dowstream map
// downstream map
op_downstream_map_.at(i) = minumum_nexts;
}
VLOG(8) << "Finish shrink downstream map";
Expand Down Expand Up @@ -1031,13 +1031,13 @@ void DependencyBuilderSimplify::AddDependencyForCoalesceTensorOp() {
}

// find first op read 'outputs' between (first_read_fused_out_op, end)
// add depned: first_read_fused_out_op -> first op that reads 'outputs'
// add depend: first_read_fused_out_op -> first op that reads 'outputs'

// special case for consecutive communication ops, for example,
// FusedOutput = c_sync_calc_stream(FusedOutput)
// FusedOutput= c_allreduce_sum(FusedOutput)
// FusedOutput = c_sync_comm_stream(FusedOutput)
// we should take the last one to add depned instead of
// we should take the last one to add depend instead of
// 'first_read_fused_out_op'
size_t target = first_read_fused_out_op;
for (size_t j = first_read_fused_out_op + 1; j < op_num_; ++j) {
Expand Down Expand Up @@ -1236,8 +1236,8 @@ void DependencyBuilderSimplify::SetSameStream() {
}
}

// get_new_exector_order by dfs
std::vector<size_t> DependencyBuilderSimplify::get_new_exexutor_order() {
// get_new_executor_order by dfs
std::vector<size_t> DependencyBuilderSimplify::get_new_executor_order() {
PADDLE_ENFORCE_EQ(
is_build_,
true,
Expand Down Expand Up @@ -1288,17 +1288,17 @@ std::vector<size_t> DependencyBuilderSimplify::get_new_exexutor_order() {
is_visit[op_idx] = true;
}

std::vector<size_t> dependecy_count(op_num_, 0);
std::vector<size_t> dependency_count(op_num_, 0);
for (auto it : op_downstream_map_) {
for (auto op_idx : it.second) {
dependecy_count[op_idx]++;
dependency_count[op_idx]++;
}
}
std::stack<size_t> s;
std::priority_queue<std::pair<size_t, size_t>> pq;

for (size_t op_idx = op_num_ - 1; op_idx >= start_index_; op_idx--) {
if (dependecy_count[op_idx] == 0) {
if (dependency_count[op_idx] == 0) {
pq.push(std::make_pair(op_behind_num[op_idx], op_idx));
}
}
Expand All @@ -1318,7 +1318,7 @@ std::vector<size_t> DependencyBuilderSimplify::get_new_exexutor_order() {
for (auto it = op_downstream_map_[current].rbegin();
it != op_downstream_map_[current].rend();
it++) {
if (--dependecy_count[*it] == 0 && !not_usefull_op.count(current)) {
if (--dependency_count[*it] == 0 && !not_usefull_op.count(current)) {
pq.push(std::make_pair(op_behind_num[*it], *it));
// s.push(*it);
}
Expand Down Expand Up @@ -1383,8 +1383,8 @@ void DependencyBuilderSimplify::AddDownstreamOp(size_t prior_op_idx,
std::set<size_t>& downstream_ops = op_downstream_map_[prior_op_idx];
// NOTE(Ruibiao): Here the downstream map shrinking is best-effort, therefore
// ShrinkDownstreamMap after BuildDownstreamMap is still helpful. For example,
// a->c will not be shrinked in the following case: AddDownstreamOp(a, b) ->
// AddDownstreamOp(a, c) -> AddDownstreamOp(b, c), it should be shrinked by
// a->c will not be shrunk in the following case: AddDownstreamOp(a, b) ->
// AddDownstreamOp(a, c) -> AddDownstreamOp(b, c), it should be shrunk by
// ShrinkDownstreamMap.
for (size_t op_idx : downstream_ops) {
if (OpHappensBefore(op_idx, posterior_op_idx)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class InstructionBase;
namespace interpreter {

// DependencyBuilder provides some dependency adding function to handle the
// dependency that cannot be explicitly expresed by a Program. It is a
// dependency that cannot be explicitly expressed by a Program. It is a
// compromise of the incomplete expression ability of the Program. Do not add
// too many functions here at will, that will bring great burden to the
// Interpretercore.
Expand Down Expand Up @@ -146,7 +146,7 @@ class DependencyBuilderSimplify {
phi::errors::Unavailable("op_happen_before is not yet built"));
return op_happens_before_.at(prior_op_idx).at(posterior_op_idx);
}
std::vector<size_t> get_new_exexutor_order();
std::vector<size_t> get_new_executor_order();

private:
void AddDependencyForCoalesceTensorOp();
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/new_executor/new_executor_defs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ void Instruction::AddInplace(Variable* in, Variable* out) {
void Instruction::ClearInplace() { vec_inplace_in_to_out_.clear(); }

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
void Instruction::UpdataRecordStreamForGcInfo() {
void Instruction::UpdateRecordStreamForGcInfo() {
if (!IsInterpretercoreFastGCEnabled() ||
KernelType() != OpFuncType::kGpuAsync) {
return;
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/new_executor/new_executor_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ class Instruction {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
bool need_record_stream_for_gc_ = false;
gpuStream_t stream_{nullptr};
void UpdataRecordStreamForGcInfo();
void UpdateRecordStreamForGcInfo();
#endif

bool can_use_infermeta_ctx_ = false;
Expand Down
Loading