Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions paddle/common/enforce.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,8 @@ class LogSimplyStrRegistrar {
} // namespace enforce
} // namespace common

// TODO(zhangbopd): This is a copy from pir, and shoud be removed after merge
// this into common enfoce namespace above.
// TODO(zhangbopd): This is a copy from pir, and should be removed after merge
// this into common enforce namespace above.
template <typename T>
inline bool is_error(const T& stat) {
return !stat;
Expand Down
16 changes: 8 additions & 8 deletions paddle/fluid/distributed/auto_parallel/spmd_rules/common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ std::unordered_map<std::string, int64_t> ShardingMergeForTensors(
}
}

// Resolute "mesh_dim shard by more than one axis" confict.
// Resolute "mesh_dim shard by more than one axis" conflict.
// Now we just naive pick the first axis naively.
// (TODO) use local cost model to pick the axis with lowest cost(in concern of
// memory or communication or computation).
Expand All @@ -107,7 +107,7 @@ std::unordered_map<std::string, int64_t> ShardingMergeForTensors(
return axis_to_dim_map;
}

// Rule1: A repicated dimension could be merged by any sharded dimension.
// Rule1: A replicated dimension could be merged by any sharded dimension.
// Rule2: A tensor axis could at most be sharded by one mesh dimension.
// (TODO trigger heuristics cost model and reshard to handle axis sharded by
// multiple dimension case.)
Expand Down Expand Up @@ -159,7 +159,7 @@ std::vector<int64_t> ResoluteOutputPartialDimension(
return partial_on_dims;
}

std::string GetBroadcastAxes(const int64_t& tenosr_ndim,
std::string GetBroadcastAxes(const int64_t& tensor_ndim,
const int64_t& broadcast_ndim,
const std::string& alphabet) {
PADDLE_ENFORCE_GE(
Expand All @@ -170,15 +170,15 @@ std::string GetBroadcastAxes(const int64_t& tenosr_ndim,
alphabet.size(),
broadcast_ndim));
PADDLE_ENFORCE_GE(broadcast_ndim,
tenosr_ndim,
tensor_ndim,
phi::errors::InvalidArgument(
"broadcast ndim [%d] is less than tenosr ndim [%d]",
"broadcast ndim [%d] is less than tensor ndim [%d]",
broadcast_ndim,
tenosr_ndim));
if (tenosr_ndim <= 0) {
tensor_ndim));
if (tensor_ndim <= 0) {
return std::string();
}
return alphabet.substr(broadcast_ndim - tenosr_ndim, tenosr_ndim);
return alphabet.substr(broadcast_ndim - tensor_ndim, tensor_ndim);
}

TensorDistAttr ReplicatedOnMesh(const TensorDistAttr& src_dist_attr) {
Expand Down
22 changes: 11 additions & 11 deletions paddle/fluid/distributed/auto_parallel/spmd_rules/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ class SPMDRuleBase {
// 1. Merge the Sharding (dims_mapping) among Input Tensors.
// 2. Infer the Sharding (dims_mapping) for Output Tensors.
// The Info of input tensors (Shape and DistAttr) are wrapped as
// DistTensorSpec, and op attribtue should be given as AttributeMap. The
// DistTensorSpec, and op attribute should be given as AttributeMap. The
// Output is a pair consist of two vectors:
// 1. The first vector: the merged DistAttr of input tensors.
// 2. The infered DistAttr of output tensors.
// 2. The inferred DistAttr of output tensors.
// The Merged DistAttr might be different from the original Intput DistAttrs,
// which means that the corressponding input tensor need to be reshard.
// which means that the corresponding input tensor need to be reshard.
virtual std::pair<std::vector<TensorDistAttr>, std::vector<TensorDistAttr>>
InferForward(const std::vector<DistTensorSpec>& input_specs,
const paddle::framework::AttributeMap& attrs);
Expand All @@ -55,10 +55,10 @@ class SPMDRuleBase {
// 1. Merge the Sharding (dims_mapping) among Output Tensors.
// 2. Infer the Sharding (dims_mapping) for Input Tensors.
// The Info of output tensors (Shape and DistAttr) are wrapped as
// DistTensorSpec, and op attribtue should be given as AttributeMap. The
// DistTensorSpec, and op attribute should be given as AttributeMap. The
// Output is a pair consist of two vectors:
// 1. The first vector: the merged DistAttr of output tensors.
// 2. The infered DistAttr of Input tensors.
// 2. The inferred DistAttr of Input tensors.
virtual std::pair<std::vector<TensorDistAttr>, std::vector<TensorDistAttr>>
InferBackward(const std::vector<DistTensorSpec>& input_specs,
const std::vector<DistTensorSpec>& output_specs,
Expand Down Expand Up @@ -96,7 +96,7 @@ std::unordered_map<std::string, int64_t> ShardingMergeForTensors(
const bool merge_conflicts = true);

// Merge the sharding specification (dims mapping) for one tensor Axis.
// Rule1: A repicated dimension could be merged by any sharded dimension.
// Rule1: A replicated dimension could be merged by any sharded dimension.
// Rule2: A tensor axis could at most be sharded by one mesh dimension.
// (TODO trigger heuristics cost model and reshard to handle axis sharded by
// multiple dimension case.)
Expand All @@ -111,18 +111,18 @@ int64_t ShardingMergeForAxis(const std::string& axis,
TensorDistAttr CopyTensorDistAttrForOutput(const TensorDistAttr& src_dist_attr);

// Resolute the partial mesh dimension of a output tensor, giving the
// merged sharding specifcation of input tensors and the axis names of output
// merged sharding specification of input tensors and the axis names of output
// tensor. Input are
std::vector<int64_t> ResoluteOutputPartialDimension(
const std::unordered_map<std::string, int64_t>& axis_to_dim_map,
const std::string& tensor_axes);

// Generate the axis notation of tensor for the einsum notation of a broadcast
// operation(alignment star from the rightmost axis). tenosr_ndim: the size of
// the tensor. broadcast_ndim: the maxium size of tensors in this broadcast
// operation(alignment star from the rightmost axis). tensor_ndim: the size of
// the tensor. broadcast_ndim: the maximum size of tensors in this broadcast
// operation. alphabet: the characters used to represent the axes of tensor.
// length of alphabet should >= broadcast_ndim.
std::string GetBroadcastAxes(const int64_t& tenosr_ndim,
std::string GetBroadcastAxes(const int64_t& tensor_ndim,
const int64_t& broadcast_ndim,
const std::string& alphabet);

Expand All @@ -147,7 +147,7 @@ GetAxesDimsMappingPair(const std::vector<std::string>& tensor_axes,
// the annotated axes after inferring forward or backward. The parameter axis
// stores the axes of the tensor. "1" is a special axis, for the axis "1", set
// its dims mapping to -1.
// if unsharded_miss_axis, "-1" is assigend to axes that has no key in
// if unsharded_miss_axis, "-1" is assigned to axes that has no key in
// axis_to_dim_map.
std::vector<int64_t> GetDimsMappingForAxes(
const std::string& axes,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ ReplicatedSPMDRule::InferForward(const std::vector<DistTensorSpec>& input_specs,
}

// TODO(ljz): we need to know num of output and size of each output before
// generate the excat replicasted dist tensor attr for the current op.
// generate the excat replicated dist tensor attr for the current op.
// here we just assume that only one output tensor and has the same size as
// the first input tensor.
return {intput_dist_attrs, {ReplicatedOnMesh(input_specs[0].dist_attr())}};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace distributed {

// NOTE: Notice that some backends use `stream` as an abstract conception of
// hardward resource. We provide this base class allowing users to put
// communications on calculation stream. In some scenorios, we found this will
// communications on calculation stream. In some scenarios, we found this will
// save the time of switching streams.
class ProcessGroupWithStream : public ProcessGroup {
public:
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/distributed/collective/reducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -905,7 +905,7 @@ void EagerReducer::MarkVarReady(const size_t var_index,
grad_tensor.is_selected_rows(),
true,
platform::errors::PreconditionNotMet(
"The sparse parameter[%d][%s] must have a selectedrows gradient. "
"The sparse parameter[%d][%s] must have a selected rows gradient. "
"Before forward pass, the parameter type is inferred to be "
"SelectedRows, but after backward pass, its actual type becomes "
"LodTensor. It is currently not supported by DataParallel. "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ void ComputeInterceptor::DecodeMsgVars(const InterceptorMessage& msg) {
microbatch_scopes_.size(),
platform::errors::InvalidArgument(
"Step out of range. There are %ld "
"microbatch_scopes, but recevice scope index %ld",
"microbatch_scopes, but receive scope index %ld",
microbatch_scopes_.size(),
scope_id));
auto* scope = microbatch_scopes_[scope_id];
Expand All @@ -76,7 +76,7 @@ InterceptorMessage ComputeInterceptor::PrepareVarsMsg() {
microbatch_scopes_.size(),
platform::errors::InvalidArgument(
"Step out of range. There are %ld "
"microbatch_scopes, but recevice scope index %ld",
"microbatch_scopes, but receive scope index %ld",
microbatch_scopes_.size(),
cur_scope_id_));
auto* scope = microbatch_scopes_[cur_scope_id_];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ bool CondInterceptor::GetCondResult() {
microbatch_scopes_.size(),
platform::errors::InvalidArgument(
"Step out of range. There are %ld "
"microbatch_scopes, but recevice scope index %ld",
"microbatch_scopes, but receive scope index %ld",
microbatch_scopes_.size(),
cur_scope_id_));
auto* cond_var =
Expand Down
10 changes: 5 additions & 5 deletions paddle/fluid/distributed/ps/service/brpc_ps_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ int32_t BrpcPsClient::InitializeFlWorker(const std::string &self_endpoint) {
coordinator_ip_port.assign(coordinator_list[i].ip.c_str());
coordinator_ip_port.append(":");
coordinator_ip_port.append(std::to_string(coordinator_list[i].port));
VLOG(0) << "fl-ps > BrpcFlclient connetcting to coordinator: "
VLOG(0) << "fl-ps > BrpcFlclient connecting to coordinator: "
<< coordinator_ip_port;
for (size_t j = 0; j < _coordinator_channels[i].size(); ++j) {
_coordinator_channels[i][j].reset(new brpc::Channel());
Expand Down Expand Up @@ -383,7 +383,7 @@ int32_t BrpcPsClient::Initialize() {

int DownpourBrpcClosure::check_response(size_t request_idx, int cmd_id) {
if (_cntls[request_idx]->Failed()) {
LOG(ERROR) << "resquest cmd_id:" << cmd_id
LOG(ERROR) << "request cmd_id:" << cmd_id
<< " failed, "
"err:"
<< _cntls[request_idx]->ErrorText();
Expand Down Expand Up @@ -1622,7 +1622,7 @@ void BrpcPsClient::PushSparseTaskConsume() {
auto sparse_task_data = _sparse_task_pool.get();

task_list.clear();
int cur_meger_size = task_queue->Size();
int cur_merge_size = task_queue->Size();

// task_list[0] 为一个空SparseAsyncTask, 分shard异步merge结果存入此结构。
sparse_task_data->shared_data.resize(request_call_num);
Expand All @@ -1632,12 +1632,12 @@ void BrpcPsClient::PushSparseTaskConsume() {
auto async_task =
new SparseAsyncTask(sparse_task_data, table_id, push_timer);

task_list.reserve(cur_meger_size + 1);
task_list.reserve(cur_merge_size + 1);

task_list.push_back(
std::move(std::shared_ptr<SparseAsyncTask>(async_task)));

while (!task_queue->Empty() && merge_count < cur_meger_size) {
while (!task_queue->Empty() && merge_count < cur_merge_size) {
++merge_count;
SparseAsyncTask *task = nullptr;
task_queue->Get(task);
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/distributed/ps/service/graph_brpc_server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ int32_t GraphBrpcService::LoadOneTable(Table *table,
set_response_code(
response,
-1,
"PsRequestMessage.datas is requeired at least 2 for path & load_param");
"PsRequestMessage.datas is required at least 2 for path & load_param");
return -1;
}
if (table->Load(request.params(0), request.params(1)) != 0) {
Expand Down