Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions paddle/fluid/framework/device_worker.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ class DeviceWorker {
FetchConfig fetch_config_;
bool use_cvm_;
bool no_cvm_;
bool scale_sparse_grad_;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

scale_sparse_gradient_with_batch_size

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

have changed scale_sparse_grad to scale_sparse_gradient_with_batch_size

TrainerDesc trainer_desc_;

// dump params or grads for debug
Expand Down
5 changes: 3 additions & 2 deletions paddle/fluid/framework/downpour_worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ void DownpourWorker::Initialize(const TrainerDesc& desc) {
use_cvm_ = desc.use_cvm();
// for sparse value accessor, embedding only
no_cvm_ = desc.no_cvm();
scale_sparse_grad_ = desc.scale_sparse_grad();
scale_datanorm_ = desc.scale_datanorm();
dump_slot_ = desc.dump_slot();
adjust_ins_weight_config_ = desc.adjust_ins_weight_config();
Expand Down Expand Up @@ -591,7 +592,7 @@ void DownpourWorker::TrainFilesWithProfiler() {
*thread_scope_, tid, features_[tid], feature_labels_[tid],
sparse_key_names_[tid], sparse_grad_names_[tid], table.emb_dim(),
&feature_grads_[tid], &push_sparse_status_, cur_batch, use_cvm_,
dump_slot_, &sparse_push_keys_[tid], no_cvm_);
dump_slot_, &sparse_push_keys_[tid], no_cvm_, scale_sparse_grad_);
timeline.Pause();
push_sparse_time += timeline.ElapsedSec();
total_time += timeline.ElapsedSec();
Expand Down Expand Up @@ -866,7 +867,7 @@ void DownpourWorker::TrainFiles() {
*thread_scope_, tid, features_[tid], feature_labels_[tid],
sparse_key_names_[tid], sparse_grad_names_[tid], table.emb_dim(),
&feature_grads_[tid], &push_sparse_status_, cur_batch, use_cvm_,
dump_slot_, &sparse_push_keys_[tid], no_cvm_);
dump_slot_, &sparse_push_keys_[tid], no_cvm_, scale_sparse_grad_);
}
}

Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/framework/downpour_worker_opt.cc
Original file line number Diff line number Diff line change
Expand Up @@ -450,11 +450,12 @@ void DownpourWorkerOpt::TrainFiles() {
break;
}
}
bool scale_sparse_grad_ = true;
fleet_ptr_->PushSparseVarsWithLabelAsync(
*thread_scope_, tid, features_[tid], feature_labels_[tid],
sparse_key_names_[tid], sparse_grad_names_[tid], table.emb_dim(),
&feature_grads_[tid], &push_sparse_status_, cur_batch, use_cvm_,
dump_slot_, &sparse_push_keys_[tid], no_cvm_);
dump_slot_, &sparse_push_keys_[tid], no_cvm_, scale_sparse_grad_);
}
}

Expand Down
5 changes: 3 additions & 2 deletions paddle/fluid/framework/fleet/fleet_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -870,7 +870,8 @@ void FleetWrapper::PushSparseVarsWithLabelAsync(
std::vector<std::vector<float>>* push_values,
std::vector<::std::future<int32_t>>* push_sparse_status,
const int batch_size, const bool use_cvm, const bool dump_slot,
std::vector<uint64_t>* sparse_push_keys, const bool no_cvm) {
std::vector<uint64_t>* sparse_push_keys, const bool no_cvm,
const bool scale_sparse_grad) {
#ifdef PADDLE_WITH_PSLIB
int offset = 2;
int slot_offset = 0;
Expand Down Expand Up @@ -939,7 +940,7 @@ void FleetWrapper::PushSparseVarsWithLabelAsync(
}
float* g = g_tensor->data<float>();

if (scale_sparse_gradient_with_batch_size_ && grad_dim > 0) {
if (scale_sparse_grad && grad_dim > 0) {
int dim = emb_dim;
Eigen::Map<
Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/framework/fleet/fleet_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,8 @@ class FleetWrapper {
std::vector<std::vector<float>>* push_values,
std::vector<::std::future<int32_t>>* push_sparse_status,
const int batch_size, const bool use_cvm, const bool dump_slot,
std::vector<uint64_t>* sparse_push_keys, const bool no_cvm);
std::vector<uint64_t>* sparse_push_keys, const bool no_cvm,
const bool scale_sparse_grad);

// Push sparse variables to server in async mode
void PushSparseFromTensorWithLabelAsync(
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/framework/trainer_desc.proto
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ message TrainerDesc {

optional bool use_ps_gpu = 32 [ default = false ];
optional string user_define_dump_filename = 33;
optional bool scale_sparse_grad = 34 [ default = true ];

// device worker parameters
optional HogwildWorkerParameter hogwild_param = 101;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,7 @@ def _minimize(self,
opt_info["worker_skipped_ops"] = worker_skipped_ops
opt_info["use_cvm"] = strategy.get("use_cvm", False)
opt_info["no_cvm"] = strategy.get("no_cvm", False)
opt_info["scale_sparse_grad"] = strategy.get("scale_sparse_grad", True)
opt_info["worker_class"] = strategy.get("worker_class",
"DownpourWorker")
opt_info["stat_var_names"] = strategy.get("stat_var_names", [])
Expand Down
3 changes: 3 additions & 0 deletions python/paddle/fluid/trainer_desc.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ def _set_use_cvm(self, use_cvm=False):
def _set_no_cvm(self, no_cvm=False):
self.proto_desc.no_cvm = no_cvm

def _set_scale_sparse_grad(self, scale_sparse_grad=True):
self.proto_desc.scale_sparse_grad = scale_sparse_grad

def _set_scale_datanorm(self, scale_datanorm=-1):
self.proto_desc.scale_datanorm = scale_datanorm

Expand Down
3 changes: 3 additions & 0 deletions python/paddle/fluid/trainer_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ def _create_trainer(self, opt_info=None):
trainer._set_use_cvm(opt_info["use_cvm"])
if opt_info.get("no_cvm") is not None:
trainer._set_no_cvm(opt_info["no_cvm"])
if opt_info.get("scale_sparse_grad") is not None:
trainer._set_scale_sparse_grad(opt_info[
"scale_sparse_grad"])
if opt_info.get("scale_datanorm") is not None:
trainer._set_scale_datanorm(opt_info["scale_datanorm"])
if opt_info.get("adjust_ins_weight") is not None:
Expand Down