Skip to content

Commit a5cd974

Browse files
committed
2 parents c8cd512 + e58c705 commit a5cd974

File tree

153 files changed

+9076
-2168
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

153 files changed

+9076
-2168
lines changed

CMakeLists.txt

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License
1414

15-
cmake_minimum_required(VERSION 3.15)
15+
cmake_minimum_required(VERSION 3.10)
1616
cmake_policy(VERSION 3.10)
1717
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
1818
set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
@@ -34,7 +34,7 @@ option(WITH_XPU "Compile PaddlePaddle with BAIDU KUNLUN XPU" OFF)
3434
option(WITH_WIN_DUMP_DBG "Compile with windows core dump debug mode" OFF)
3535
option(WITH_ASCEND "Compile PaddlePaddle with ASCEND" OFF)
3636
option(WITH_ROCM "Compile PaddlePaddle with ROCM platform" OFF)
37-
# NOTE(zhiqiu): WITH_ASCEND_CL can be compile on x86_64, so we can set WITH_ASCEND=OFF and WITH_ASCEND_CL=ON
37+
# NOTE(zhiqiu): WITH_ASCEND_CL can be compile on x86_64, so we can set WITH_ASCEND=OFF and WITH_ASCEND_CL=ON
3838
# to develop some acl related functionality on x86
3939
option(WITH_ASCEND_CL "Compile PaddlePaddle with ASCEND CL" ${WITH_ASCEND})
4040
option(WITH_ASCEND_CXX11 "Compile PaddlePaddle with ASCEND and CXX11 ABI" OFF)
@@ -65,7 +65,7 @@ if(WITH_MUSL)
6565
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations -Wno-deprecated-declarations -Wno-error=pessimizing-move -Wno-error=deprecated-copy")
6666
endif()
6767

68-
if(WITH_ASCEND AND NOT WITH_ASCEND_CXX11)
68+
if(WITH_ASCEND_CL AND NOT WITH_ASCEND_CXX11)
6969
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
7070
endif()
7171

@@ -103,7 +103,7 @@ if(WIN32)
103103
endif()
104104
endforeach(flag_var)
105105
endif()
106-
106+
107107
# NOTE(Avin0323): Less parallel count result in faster compilation.
108108
math(EXPR PROCESS_MAX "${CPU_CORES} * 2 / 3")
109109
# windows build turn off warnings, use parallel compiling.
@@ -182,7 +182,6 @@ option(WITH_PSLIB "Compile with pslib support" OFF)
182182
option(WITH_BOX_PS "Compile with box_ps support" OFF)
183183
option(WITH_XBYAK "Compile with xbyak support" ON)
184184
option(WITH_CONTRIB "Compile the third-party contributation" OFF)
185-
option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE})
186185
option(WITH_PSCORE "Compile with parameter server support" ${WITH_DISTRIBUTE})
187186
option(WITH_HETERPS "Compile with heterps" OFF})
188187
option(WITH_INFERENCE_API_TEST "Test fluid inference C++ high-level api interface" OFF)
@@ -259,9 +258,6 @@ endif()
259258

260259
if(WITH_BRPC_RDMA)
261260
message(STATUS "Use brpc with rdma.")
262-
if(WITH_GRPC)
263-
message(FATAL_ERROR "Can't use grpc with brpc rdma.")
264-
endif()
265261
if(NOT WITH_DISTRIBUTE)
266262
message(FATAL_ERROR "Can't use brpc rdma in no distribute env.")
267263
endif()

cmake/configure.cmake

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -177,10 +177,6 @@ if(WITH_HETERPS)
177177
add_definitions(-DPADDLE_WITH_HETERPS)
178178
endif()
179179

180-
if(WITH_GRPC)
181-
add_definitions(-DPADDLE_WITH_GRPC)
182-
endif(WITH_GRPC)
183-
184180
if(WITH_BRPC_RDMA)
185181
add_definitions(-DPADDLE_WITH_BRPC_RDMA)
186182
endif(WITH_BRPC_RDMA)

cmake/external/ascend.cmake

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ if(EXISTS ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/include/graph/ascend_str
2626
add_definitions(-DPADDLE_WITH_ASCEND_STRING)
2727
endif()
2828

29-
if(WITH_ASCEND)
29+
30+
if(WITH_ASCEND OR WITH_ASCEND_CL)
3031
set(ASCEND_DRIVER_DIR ${ASCEND_DIR}/driver/lib64)
3132
set(ASCEND_DRIVER_COMMON_DIR ${ASCEND_DIR}/driver/lib64/common)
3233
set(ASCEND_DRIVER_SHARE_DIR ${ASCEND_DIR}/driver/lib64/share)
@@ -49,7 +50,6 @@ if(WITH_ASCEND)
4950
INCLUDE_DIRECTORIES(${ATLAS_RUNTIME_INC_DIR})
5051

5152

52-
5353
ADD_LIBRARY(ascend_ge SHARED IMPORTED GLOBAL)
5454
SET_PROPERTY(TARGET ascend_ge PROPERTY IMPORTED_LOCATION ${atlas_ge_runner_lib})
5555

@@ -65,6 +65,7 @@ endif()
6565
if(WITH_ASCEND_CL)
6666
set(ASCEND_CL_DIR ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/lib64)
6767

68+
set(ascend_hccl_lib ${ASCEND_CL_DIR}/libhccl.so)
6869
set(ascendcl_lib ${ASCEND_CL_DIR}/libascendcl.so)
6970
set(acl_op_compiler_lib ${ASCEND_CL_DIR}/libacl_op_compiler.so)
7071
set(FWKACLLIB_INC_DIR ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/include)
@@ -78,6 +79,9 @@ if(WITH_ASCEND_CL)
7879
ADD_LIBRARY(ascendcl SHARED IMPORTED GLOBAL)
7980
SET_PROPERTY(TARGET ascendcl PROPERTY IMPORTED_LOCATION ${ascendcl_lib})
8081

82+
ADD_LIBRARY(ascend_hccl SHARED IMPORTED GLOBAL)
83+
SET_PROPERTY(TARGET ascend_hccl PROPERTY IMPORTED_LOCATION ${ascend_hccl_lib})
84+
8185
ADD_LIBRARY(acl_op_compiler SHARED IMPORTED GLOBAL)
8286
SET_PROPERTY(TARGET acl_op_compiler PROPERTY IMPORTED_LOCATION ${acl_op_compiler_lib})
8387
add_custom_target(extern_ascend_cl DEPENDS ascendcl acl_op_compiler)

cmake/generic.cmake

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -447,9 +447,20 @@ function(cc_test TARGET_NAME)
447447
cc_test_build(${TARGET_NAME}
448448
SRCS ${cc_test_SRCS}
449449
DEPS ${cc_test_DEPS})
450-
cc_test_run(${TARGET_NAME}
451-
COMMAND ${TARGET_NAME}
452-
ARGS ${cc_test_ARGS})
450+
# we dont test hcom op, because it need complex configuration
451+
# with more than one machine
452+
if(NOT ("${TARGET_NAME}" STREQUAL "c_broadcast_op_npu_test" OR
453+
"${TARGET_NAME}" STREQUAL "c_allreduce_sum_op_npu_test" OR
454+
"${TARGET_NAME}" STREQUAL "c_allreduce_max_op_npu_test" OR
455+
"${TARGET_NAME}" STREQUAL "c_reducescatter_op_npu_test" OR
456+
"${TARGET_NAME}" STREQUAL "c_allgather_op_npu_test" OR
457+
"${TARGET_NAME}" STREQUAL "send_v2_op_npu_test" OR
458+
"${TARGET_NAME}" STREQUAL "c_reduce_sum_op_npu_test" OR
459+
"${TARGET_NAME}" STREQUAL "recv_v2_op_npu_test"))
460+
cc_test_run(${TARGET_NAME}
461+
COMMAND ${TARGET_NAME}
462+
ARGS ${cc_test_ARGS})
463+
endif()
453464
endif()
454465
endfunction(cc_test)
455466

@@ -807,7 +818,7 @@ function(py_test TARGET_NAME)
807818
${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS}
808819
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
809820
endif()
810-
821+
811822
if (WIN32)
812823
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150)
813824
endif()

cmake/third_party.cmake

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ set(third_party_deps)
2929
# 2. REPOSITORY: specify git REPOSITORY of 3rd party
3030
# 3. TAG: specify git tag/branch/commitID of 3rd party
3131
# 4. DIR: overwrite the original SOURCE_DIR when cache directory
32-
#
32+
#
3333
# The function Return 1 PARENT_SCOPE variables:
34-
# - ${TARGET}_DOWNLOAD_CMD: Simply place "${TARGET}_DOWNLOAD_CMD" in ExternalProject_Add,
34+
# - ${TARGET}_DOWNLOAD_CMD: Simply place "${TARGET}_DOWNLOAD_CMD" in ExternalProject_Add,
3535
# and you no longer need to set any donwnload steps in ExternalProject_Add.
3636
# For example:
3737
# Cache_third_party(${TARGET}
@@ -52,7 +52,7 @@ FUNCTION(cache_third_party TARGET)
5252
SET(${TARGET_NAME}_DOWNLOAD_CMD
5353
GIT_REPOSITORY ${cache_third_party_REPOSITORY})
5454
IF(cache_third_party_TAG)
55-
LIST(APPEND ${TARGET_NAME}_DOWNLOAD_CMD
55+
LIST(APPEND ${TARGET_NAME}_DOWNLOAD_CMD
5656
GIT_TAG ${cache_third_party_TAG})
5757
ENDIF()
5858
ELSEIF(cache_third_party_URL)
@@ -130,7 +130,7 @@ ENDFUNCTION()
130130
# Correction of flags on different Platform(WIN/MAC) and Print Warning Message
131131
if (APPLE)
132132
if(WITH_MKL)
133-
MESSAGE(WARNING
133+
MESSAGE(WARNING
134134
"Mac is not supported with MKL in Paddle yet. Force WITH_MKL=OFF.")
135135
set(WITH_MKL OFF CACHE STRING "Disable MKL for building on mac" FORCE)
136136
endif()
@@ -141,7 +141,7 @@ if(WIN32 OR APPLE)
141141
SET(WITH_XBYAK OFF CACHE STRING "Disable XBYAK in Windows and MacOS" FORCE)
142142

143143
if(WITH_LIBXSMM)
144-
MESSAGE(WARNING
144+
MESSAGE(WARNING
145145
"Windows, Mac are not supported with libxsmm in Paddle yet."
146146
"Force WITH_LIBXSMM=OFF")
147147
SET(WITH_LIBXSMM OFF CACHE STRING "Disable LIBXSMM in Windows and MacOS" FORCE)
@@ -276,7 +276,7 @@ endif(WITH_BOX_PS)
276276

277277
if(WITH_ASCEND OR WITH_ASCEND_CL)
278278
include(external/ascend)
279-
if(WITH_ASCEND)
279+
if(WITH_ASCEND OR WITH_ASCEND_CL)
280280
list(APPEND third_party_deps extern_ascend)
281281
endif()
282282
if(WITH_ASCEND_CL)
@@ -290,7 +290,7 @@ if (WITH_PSCORE)
290290

291291
include(external/leveldb)
292292
list(APPEND third_party_deps extern_leveldb)
293-
293+
294294
include(external/brpc)
295295
list(APPEND third_party_deps extern_brpc)
296296

paddle/fluid/distributed/fleet.cc

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,44 @@ void FleetWrapper::CreateClient2ClientConnection() {
146146
client2client_max_retry_);
147147
}
148148

149+
std::future<int32_t> FleetWrapper::PullSparseVarsAsync(
150+
const Scope& scope, const uint64_t table_id,
151+
const std::vector<std::string>& var_names, std::vector<uint64_t>* fea_keys,
152+
std::vector<std::vector<float>>* fea_values, int fea_value_dim) {
153+
fea_keys->clear();
154+
fea_keys->resize(0);
155+
fea_keys->reserve(MAX_FEASIGN_NUM);
156+
for (auto name : var_names) {
157+
Variable* var = scope.FindVar(name);
158+
if (var == nullptr) {
159+
continue;
160+
}
161+
LoDTensor* tensor = var->GetMutable<LoDTensor>();
162+
CHECK(tensor != nullptr) << "tensor of var " << name << " is null";
163+
int64_t* ids = tensor->data<int64_t>();
164+
size_t len = tensor->numel();
165+
for (auto i = 0u; i < len; ++i) {
166+
if (ids[i] == 0u) {
167+
continue;
168+
}
169+
fea_keys->push_back(static_cast<uint64_t>(ids[i]));
170+
}
171+
}
172+
fea_values->resize(fea_keys->size() + 1);
173+
for (auto& t : *fea_values) {
174+
t.resize(fea_value_dim);
175+
}
176+
std::vector<float*> pull_result_ptr;
177+
for (auto& t : *fea_values) {
178+
pull_result_ptr.push_back(t.data());
179+
}
180+
181+
bool training = true;
182+
return pserver_ptr_->_worker_ptr->pull_sparse(pull_result_ptr.data(),
183+
table_id, fea_keys->data(),
184+
fea_keys->size(), training);
185+
}
186+
149187
void FleetWrapper::PullSparseVarsSync(
150188
const Scope& scope, const uint64_t table_id,
151189
const std::vector<std::string>& var_names, std::vector<uint64_t>* fea_keys,

paddle/fluid/distributed/fleet.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,15 @@ class FleetWrapper {
8484
int fea_dim,
8585
const std::vector<std::string>& var_emb_names);
8686

87+
// Pull sparse variables from server in async mode
88+
// Param<in>: scope, table_id, var_names, fea_keys, fea_dim
89+
// Param<out>: fea_values std::future
90+
std::future<int32_t> PullSparseVarsAsync(
91+
const Scope& scope, const uint64_t table_id,
92+
const std::vector<std::string>& var_names,
93+
std::vector<uint64_t>* fea_keys,
94+
std::vector<std::vector<float>>* fea_values, int fea_dim);
95+
8796
// Pull sparse variables from server in sync mode
8897
// pull immediately to tensors
8998
// is_training is true means training, false means inference, the behavior is

paddle/fluid/distributed/service/graph_brpc_client.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,8 @@ std::future<int32_t> GraphBrpcClient::get_node_feat(
135135
closure->request(request_idx)
136136
->add_params(joint_feature_name.c_str(), joint_feature_name.size());
137137

138-
PsService_Stub rpc_stub(get_cmd_channel(server_index));
138+
GraphPsService_Stub rpc_stub =
139+
getServiceStub(get_cmd_channel(server_index));
139140
closure->cntl(request_idx)->set_log_id(butil::gettimeofday_ms());
140141
rpc_stub.service(closure->cntl(request_idx), closure->request(request_idx),
141142
closure->response(request_idx), closure);

paddle/fluid/distributed/table/common_sparse_table.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -126,17 +126,17 @@ void ProcessALine(const std::vector<std::string>& columns, const Meta& meta,
126126
int64_t SaveToText(std::ostream* os, std::shared_ptr<ValueBlock> block,
127127
const int mode) {
128128
int64_t not_save_num = 0;
129-
for (auto value : block->values_) {
130-
if (mode == SaveMode::delta && !value.second->need_save_) {
129+
for (auto& value : block->values_) {
130+
if (mode == SaveMode::delta && !value.second.need_save_) {
131131
not_save_num++;
132132
continue;
133133
}
134134

135-
auto* vs = value.second->data_.data();
135+
auto* vs = value.second.data_;
136136
std::stringstream ss;
137137
auto id = value.first;
138-
ss << id << "\t" << value.second->count_ << "\t"
139-
<< value.second->unseen_days_ << "\t" << value.second->is_entry_ << "\t";
138+
ss << id << "\t" << value.second.count_ << "\t" << value.second.unseen_days_
139+
<< "\t" << value.second.is_entry_ << "\t";
140140

141141
for (int i = 0; i < block->value_length_; i++) {
142142
ss << vs[i];
@@ -148,7 +148,7 @@ int64_t SaveToText(std::ostream* os, std::shared_ptr<ValueBlock> block,
148148
os->write(ss.str().c_str(), sizeof(char) * ss.str().size());
149149

150150
if (mode == SaveMode::base || mode == SaveMode::delta) {
151-
value.second->need_save_ = false;
151+
value.second.need_save_ = false;
152152
}
153153
}
154154

0 commit comments

Comments
 (0)