From 8cd1d67f16a3f6ccf18a9fe3b806271e0b6d8ef3 Mon Sep 17 00:00:00 2001 From: Thunderbrook Date: Thu, 19 Aug 2021 21:03:36 +0800 Subject: [PATCH 1/4] merge dense --- .../framework/fleet/heter_ps/optimizer.cuh.h | 4 +-- paddle/fluid/framework/ps_gpu_trainer.cc | 29 ++++++++++++++----- paddle/fluid/framework/trainer.h | 2 ++ .../pslib/optimizer_factory.py | 5 +++- 4 files changed, 30 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/framework/fleet/heter_ps/optimizer.cuh.h b/paddle/fluid/framework/fleet/heter_ps/optimizer.cuh.h index 362877aa1604e0..374984ecdb6b6e 100644 --- a/paddle/fluid/framework/fleet/heter_ps/optimizer.cuh.h +++ b/paddle/fluid/framework/fleet/heter_ps/optimizer.cuh.h @@ -44,7 +44,7 @@ class Optimizer { if (w < optimizer_config::min_bound) w = optimizer_config::min_bound; if (w > optimizer_config::max_bound) w = optimizer_config::max_bound; - add_g2sum = scaled_grad * scaled_grad; + add_g2sum += scaled_grad * scaled_grad; g2sum += add_g2sum; } @@ -64,7 +64,7 @@ class Optimizer { w[i] = optimizer_config::mf_min_bound; if (w[i] > optimizer_config::mf_max_bound) w[i] = optimizer_config::mf_max_bound; - add_g2sum = scaled_grad * scaled_grad; + add_g2sum += scaled_grad * scaled_grad; } g2sum += add_g2sum / n; diff --git a/paddle/fluid/framework/ps_gpu_trainer.cc b/paddle/fluid/framework/ps_gpu_trainer.cc index 39bc3f040639bf..ff2064b1a2ce42 100644 --- a/paddle/fluid/framework/ps_gpu_trainer.cc +++ b/paddle/fluid/framework/ps_gpu_trainer.cc @@ -57,8 +57,6 @@ void PSGPUTrainer::Initialize(const TrainerDesc& trainer_desc, trainer_desc.downpour_param().stat_var_names(i)); } VLOG(3) << "going to initialize pull dense worker"; - pull_dense_worker_ = PullDenseWorker::GetInstance(); - pull_dense_worker_->Initialize(trainer_desc); SetDebug(trainer_desc.debug()); trainer_desc_ = trainer_desc; workers_.resize(place_num); @@ -112,15 +110,20 @@ void PSGPUTrainer::InitTrainerEnv(const ProgramDesc& main_program, } } } + for (auto& var : main_program.Block(0).AllVars()) { + if (var->Persistable()) { + auto it = std::find(need_merge_var_names_.begin(), need_merge_var_names_.end(), var->Name()); + if (it == need_merge_var_names_.end()) { + VLOG(1) << "train param: " << var->Name(); + trainable_param_.push_back(var->Name()); + } + } + } place_ = place; return; } void PSGPUTrainer::InitOtherEnv(const ProgramDesc& main_program) { - pull_dense_worker_->SetRootScope(root_scope_); - for (size_t i = 0; i < places_.size(); ++i) { - pull_dense_worker_->AddThreadScope(workers_[i]->GetThreadScope()); - } VLOG(3) << "init other env done."; } @@ -152,6 +155,18 @@ void PSGPUTrainer::MergeToRootScope(LoDTensor* root_tensor, LoDTensor* tensor) { TensorCopy(tmp_root, platform::CPUPlace(), root_tensor); } +void PSGPUTrainer::MergeDenseParam() { + auto thread_scope = workers_[0]->GetThreadScope(); + for (auto& name : trainable_param_) { + VLOG(2) << "merge var " << name << " to root scope"; + Variable* root_var = root_scope_->FindVar(name); + LoDTensor* root_tensor = root_var->GetMutable(); + Variable* var = thread_scope->FindVar(name); + LoDTensor* tensor = var->GetMutable(); + TensorCopy((*tensor), root_tensor->place(), root_tensor); + } +} + void PSGPUTrainer::Finalize() { for (auto& th : threads_) { th.join(); @@ -187,7 +202,7 @@ void PSGPUTrainer::Finalize() { _ForEachDataType_(MergeCallback); } } - pull_dense_worker_->MergeDenseParam(); + MergeDenseParam(); root_scope_->DropKids(); } } // namespace framework diff --git a/paddle/fluid/framework/trainer.h b/paddle/fluid/framework/trainer.h index fc8fb9327d5bb2..0f34c84549f2b9 100644 --- a/paddle/fluid/framework/trainer.h +++ b/paddle/fluid/framework/trainer.h @@ -265,6 +265,7 @@ class PSGPUTrainer : public TrainerBase { } virtual std::string GetDumpPath(int tid) { return ""; } virtual void InitDumpEnv() {} + virtual void MergeDenseParam(); template void MergeToRootScope(LoDTensor* root_tensor, LoDTensor* thread_tensor); @@ -274,6 +275,7 @@ class PSGPUTrainer : public TrainerBase { DownpourWorkerParameter param_; std::map> dense_grad_names_; std::vector need_merge_var_names_; + std::vector trainable_param_; float scale_datanorm_; paddle::platform::Place place_; ProgramDesc program_; diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/optimizer_factory.py b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/optimizer_factory.py index 9a21a5a850db97..3b2f9065603756 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/optimizer_factory.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/optimizer_factory.py @@ -412,10 +412,13 @@ def _minimize(self, sparse_table_index = 0 for num in range(len(losses)): loss = losses[num] + parameters = None + if parameter_list != None: + parameters = parameter_list[num] prog_id = str(id(loss.block.program)) # param_grads of program params_grads = sorted( - fluid.backward.append_backward(loss, parameter_list, + fluid.backward.append_backward(loss, parameters, no_grad_set), key=lambda x: x[0].name) From ab4b034f71803392907b3d0d690e14d7b808f4e5 Mon Sep 17 00:00:00 2001 From: Thunderbrook Date: Thu, 19 Aug 2021 21:21:31 +0800 Subject: [PATCH 2/4] log level --- paddle/fluid/framework/ps_gpu_trainer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/framework/ps_gpu_trainer.cc b/paddle/fluid/framework/ps_gpu_trainer.cc index ff2064b1a2ce42..b1b91f2946d781 100644 --- a/paddle/fluid/framework/ps_gpu_trainer.cc +++ b/paddle/fluid/framework/ps_gpu_trainer.cc @@ -114,7 +114,7 @@ void PSGPUTrainer::InitTrainerEnv(const ProgramDesc& main_program, if (var->Persistable()) { auto it = std::find(need_merge_var_names_.begin(), need_merge_var_names_.end(), var->Name()); if (it == need_merge_var_names_.end()) { - VLOG(1) << "train param: " << var->Name(); + VLOG(2) << "train param: " << var->Name(); trainable_param_.push_back(var->Name()); } } From 820d0cfb39136d281ef52e8afe4ede1be6d17d99 Mon Sep 17 00:00:00 2001 From: Thunderbrook Date: Fri, 20 Aug 2021 11:02:50 +0800 Subject: [PATCH 3/4] tensor copy sync --- paddle/fluid/framework/ps_gpu_trainer.cc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/framework/ps_gpu_trainer.cc b/paddle/fluid/framework/ps_gpu_trainer.cc index b1b91f2946d781..8b16b6a5d007ff 100644 --- a/paddle/fluid/framework/ps_gpu_trainer.cc +++ b/paddle/fluid/framework/ps_gpu_trainer.cc @@ -112,7 +112,8 @@ void PSGPUTrainer::InitTrainerEnv(const ProgramDesc& main_program, } for (auto& var : main_program.Block(0).AllVars()) { if (var->Persistable()) { - auto it = std::find(need_merge_var_names_.begin(), need_merge_var_names_.end(), var->Name()); + auto it = std::find(need_merge_var_names_.begin(), + need_merge_var_names_.end(), var->Name()); if (it == need_merge_var_names_.end()) { VLOG(2) << "train param: " << var->Name(); trainable_param_.push_back(var->Name()); @@ -144,15 +145,15 @@ Scope* PSGPUTrainer::GetWorkerScope(int thread_id) { return nullptr; } template void PSGPUTrainer::MergeToRootScope(LoDTensor* root_tensor, LoDTensor* tensor) { LoDTensor tmp_root; - TensorCopy(*root_tensor, platform::CPUPlace(), &tmp_root); + TensorCopySync(*root_tensor, platform::CPUPlace(), &tmp_root); T* tmp_root_data = tmp_root.data(); LoDTensor tmp_tensor; - TensorCopy(*tensor, platform::CPUPlace(), &tmp_tensor); + TensorCopySync(*tensor, platform::CPUPlace(), &tmp_tensor); T* data = tmp_tensor.data(); for (int i = 0; i < tmp_tensor.numel(); i++) { tmp_root_data[i] += data[i]; } - TensorCopy(tmp_root, platform::CPUPlace(), root_tensor); + TensorCopySync(tmp_root, platform::CPUPlace(), root_tensor); } void PSGPUTrainer::MergeDenseParam() { @@ -163,7 +164,7 @@ void PSGPUTrainer::MergeDenseParam() { LoDTensor* root_tensor = root_var->GetMutable(); Variable* var = thread_scope->FindVar(name); LoDTensor* tensor = var->GetMutable(); - TensorCopy((*tensor), root_tensor->place(), root_tensor); + TensorCopySync((*tensor), root_tensor->place(), root_tensor); } } From f77983ad8f7873e551ab8fb9b7d74a62e51a4aed Mon Sep 17 00:00:00 2001 From: Thunderbrook Date: Tue, 24 Aug 2021 10:28:48 +0800 Subject: [PATCH 4/4] format --- .../incubate/fleet/parameter_server/pslib/optimizer_factory.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/optimizer_factory.py b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/optimizer_factory.py index 3b2f9065603756..e2fb29c5439e11 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/optimizer_factory.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/optimizer_factory.py @@ -418,8 +418,7 @@ def _minimize(self, prog_id = str(id(loss.block.program)) # param_grads of program params_grads = sorted( - fluid.backward.append_backward(loss, parameters, - no_grad_set), + fluid.backward.append_backward(loss, parameters, no_grad_set), key=lambda x: x[0].name) flag_use_ps_gpu = strategy.get("use_ps_gpu", False)