Skip to content

Commit a7114e2

Browse files
author
0x45f
committed
Merge remote-tracking branch 'upstream/develop' into gast
2 parents d6721a0 + 0989211 commit a7114e2

File tree

555 files changed

+20758
-5358
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

555 files changed

+20758
-5358
lines changed

CMakeLists.txt

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -119,17 +119,19 @@ if(WIN32)
119119
endforeach(flag_var)
120120
endif()
121121

122-
math(EXPR PROCESS_MAX "${CPU_CORES} * 2 / 3")
123-
124122
# windows build turn off warnings, use parallel compiling.
125123
foreach(flag_var
126124
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
127125
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
128126
CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
129127
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO)
130128
string(REGEX REPLACE "/W[1-4]" " /W0 " ${flag_var} "${${flag_var}}")
131-
# NOTE(zhouwei25): GPU compile have too high memory utilization when parallel compiling
132-
if(NOT WITH_GPU)
129+
130+
# NOTE(zhouwei25): GPU compile have too high memory utilization when parallel compiling,
131+
# For Visual Studio generators, /MP should be added.
132+
# For other generators like Ninja, it is not need to add /MP.
133+
if("${CMAKE_GENERATOR}" STREQUAL "Visual Studio" AND NOT WITH_GPU)
134+
math(EXPR PROCESS_MAX "${CPU_CORES} * 2 / 3")
133135
set(${flag_var} "${${flag_var}} /MP${PROCESS_MAX}")
134136
endif()
135137
endforeach(flag_var)
@@ -312,6 +314,17 @@ else()
312314
endif()
313315
endif()
314316

317+
if(WITH_DISTRIBUTE)
318+
if(LINUX)
319+
set(WITH_GLOO ON CACHE STRING "Enable GLOO when compiling WITH_DISTRIBUTE=ON." FORCE)
320+
endif()
321+
if(WITH_ASCEND_CL)
322+
# disable WITH_PSCORE for NPU before include third_party
323+
MESSAGE(WARNING "Disable WITH_PSCORE when compiling with NPU. Force WITH_PSCORE=OFF.")
324+
set(WITH_PSCORE OFF CACHE BOOL "Disable WITH_PSCORE when compiling with NPU" FORCE)
325+
endif()
326+
endif()
327+
315328
include(third_party) # download, build, install third_party, Contains about 20+ dependencies
316329

317330
include(flags) # set paddle compile flags
@@ -322,12 +335,6 @@ if(WITH_PROFILER)
322335
add_definitions(-DWITH_GPERFTOOLS)
323336
endif()
324337

325-
if(WITH_DISTRIBUTE)
326-
if(LINUX)
327-
set(WITH_GLOO ON CACHE STRING "Enable GLOO when compiling WITH_DISTRIBUTE=ON." FORCE)
328-
endif()
329-
endif()
330-
331338
include(ccache) # set ccache for compilation
332339
include(util) # set unittest and link libs
333340
include(version) # set PADDLE_VERSION

cmake/ccache.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ elseif("${CMAKE_GENERATOR}" STREQUAL "Ninja")
1818

1919
if(SCCACHE_PATH)
2020
execute_process(COMMAND sccache -V OUTPUT_VARIABLE sccache_version)
21-
message(STATUS "${sccache_version} is founded, use [${SCCACHE_PATH}] to speed up compile on Windows.")
21+
message(STATUS "sccache is founded, use [${SCCACHE_PATH}] to speed up compile on Windows.")
2222

2323
set(CMAKE_C_COMPILER_LAUNCHER ${SCCACHE_PATH})
2424
set(CMAKE_CXX_COMPILER_LAUNCHER ${SCCACHE_PATH})

cmake/external/xpu.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ ELSE ()
3535
ENDIF()
3636

3737
SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
38-
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210701")
38+
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210729")
3939
SET(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
4040
SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
4141
SET(XPU_XCCL_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210623/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)

cmake/generic.cmake

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -932,12 +932,8 @@ function(generate_dummy_static_lib)
932932
if(NOT dummy_GENERATOR)
933933
message(FATAL_ERROR "You must provide a generator file name.")
934934
endif()
935-
# if ${dummy_GENERATOR} contains "/", it may be a file path
936-
if(NOT ${dummy_GENERATOR} MATCHES ".*/.*")
937-
set(dummy_GENERATOR "${CMAKE_CURRENT_LIST_DIR}/${dummy_GENERATOR}")
938-
endif()
939935
if(NOT dummy_CONTENT)
940-
set(dummy_CONTENT "${dummy_FILE_PATH} for lib ${dummy_LIB_NAME}")
936+
set(dummy_CONTENT "${dummy_LIB_NAME}_dummy.c for lib ${dummy_LIB_NAME}")
941937
endif()
942938

943939
configure_file(${PROJECT_SOURCE_DIR}/cmake/dummy.c.in ${dummy_FILE_PATH} @ONLY)

cmake/unity_build.cmake

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,14 @@ function(compose_unity_target_sources TARGET TYPE)
7777
get_property(unity_group_index_max GLOBAL PROPERTY ${TARGET}_${TYPE}_group_index)
7878
foreach(src ${ARGN})
7979
set(unity_file "")
80-
# UB use absolute path of source.
80+
# Note(zhouwei25): UB use the path releative to CMAKE_SOURCE_DIR.
81+
# If use absolute path, sccache/ccache hit rate will be reduced.
8182
if(IS_ABSOLUTE ${src})
8283
set(src_absolute_path ${src})
84+
file(RELATIVE_PATH src_relative_path ${CMAKE_SOURCE_DIR} ${src})
8385
else()
8486
set(src_absolute_path ${CMAKE_CURRENT_SOURCE_DIR}/${src})
87+
file(RELATIVE_PATH src_relative_path ${CMAKE_SOURCE_DIR} ${src_absolute_path})
8588
endif()
8689
# If `unity_group_index_max` is empty, there is no combination
8790
# relationship.
@@ -106,7 +109,7 @@ function(compose_unity_target_sources TARGET TYPE)
106109
set_property(GLOBAL APPEND PROPERTY ${unity_file_sources} ${UNITY_CU_BEFORE_CODE})
107110
endif()
108111
endif()
109-
set_property(GLOBAL APPEND PROPERTY ${unity_file_sources} "#include \"${src_absolute_path}\"")
112+
set_property(GLOBAL APPEND PROPERTY ${unity_file_sources} "#include \"${src_relative_path}\"")
110113
set(unity_target_sources ${unity_target_sources} ${unity_file})
111114
break()
112115
endif()

paddle/fluid/distributed/service/communicator.h

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -68,31 +68,62 @@ class BlockingQueue {
6868
}
6969

7070
bool Push(const T &elem) {
71-
{
72-
std::unique_lock<std::mutex> lock(mutex_);
73-
cv_.wait(lock, [&] { return queue_.size() < capacity_; });
74-
queue_.push_back(elem);
71+
std::unique_lock<std::mutex> lock(mutex_);
72+
WaitForWrite(lock);
73+
74+
queue_.push_back(elem);
75+
76+
Notify();
77+
return true;
78+
}
79+
bool WaitForWrite(std::unique_lock<std::mutex> &lock) { // NOLINT
80+
while (FullUnlocked()) {
81+
if (empty_waiters_ != 0) {
82+
empty_cond_.notify_one();
83+
}
84+
full_waiters_++;
85+
full_cond_.wait(lock);
86+
full_waiters_--;
7587
}
76-
cv_.notify_one();
7788
return true;
7889
}
79-
80-
bool Push(T &&elem) {
81-
{
82-
std::unique_lock<std::mutex> lock(mutex_);
83-
cv_.wait(lock, [&] { return queue_.size() < capacity_; });
84-
queue_.emplace_back(std::move(elem));
90+
bool WaitForRead(std::unique_lock<std::mutex> &lock) { // NOLINT
91+
while (EmptyUnlocked()) {
92+
if (full_waiters_ != 0) {
93+
full_cond_.notify_one();
94+
}
95+
empty_waiters_++;
96+
empty_cond_.wait(lock);
97+
empty_waiters_--;
8598
}
86-
cv_.notify_one();
8799
return true;
88100
}
101+
bool EmptyUnlocked() { return queue_.empty(); }
102+
103+
bool FullUnlocked() { return queue_.size() >= capacity_; }
104+
void Notify() {
105+
if (empty_waiters_ != 0 && (!EmptyUnlocked())) {
106+
empty_cond_.notify_one();
107+
}
108+
if (full_waiters_ != 0 && (!FullUnlocked())) {
109+
full_cond_.notify_one();
110+
}
111+
}
112+
113+
bool Push(T &&elem) {
114+
std::unique_lock<std::mutex> lock(mutex_);
115+
WaitForWrite(lock);
116+
queue_.emplace_back(std::move(elem));
89117

118+
Notify();
119+
return true;
120+
}
90121
T Pop() {
91122
std::unique_lock<std::mutex> lock(mutex_);
92-
cv_.wait(lock, [=] { return !queue_.empty(); });
123+
WaitForRead(lock);
93124
T rc(std::move(queue_.front()));
94125
queue_.pop_front();
95-
cv_.notify_one();
126+
Notify();
96127
return rc;
97128
}
98129

@@ -107,11 +138,14 @@ class BlockingQueue {
107138
}
108139

109140
private:
141+
int empty_waiters_ = 0;
142+
int full_waiters_ = 0;
143+
std::condition_variable empty_cond_;
144+
std::condition_variable full_cond_;
110145
const size_t capacity_;
111146
std::deque<T> queue_;
112147

113148
mutable std::mutex mutex_;
114-
std::condition_variable cv_;
115149
};
116150

117151
template <typename T, int MajorType = Eigen::RowMajor,

paddle/fluid/framework/CMakeLists.txt

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,23 +188,28 @@ cc_library(op_kernel_type SRCS op_kernel_type.cc DEPS device_context place)
188188

189189
cc_library(unused_var_check SRCS unused_var_check.cc DEPS glog no_need_buffer_vars_inference)
190190

191+
IF(WITH_XPU)
192+
cc_library(operator SRCS operator.cc DEPS xpu_op_list op_info device_context tensor scope glog trainer_desc_proto data_feed_proto
193+
shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils)
194+
ELSE()
191195
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog trainer_desc_proto data_feed_proto
192196
shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils)
197+
ENDIF()
193198

194199
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context)
195200
cc_test(operator_exception_test SRCS operator_exception_test.cc DEPS operator op_registry device_context)
196201

197202
cc_library(version SRCS version.cc)
198203
cc_test(version_test SRCS version_test.cc DEPS version)
199204

200-
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog version)
205+
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute shape_inference op_info operator glog version)
201206

202207
cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc)
203208

204209
cc_library(op_call_stack SRCS op_call_stack.cc DEPS op_proto_maker enforce)
205210
cc_test(op_call_stack_test SRCS op_call_stack_test.cc DEPS op_call_stack)
206211

207-
cc_library(program_processing SRCS program_processing.cc DEPS framework_proto)
212+
cc_library(program_processing SRCS program_processing.cc DEPS boost proto_desc)
208213
cc_test(program_processing_test SRCS program_processing_test.cc DEPS proto_desc program_processing)
209214

210215
if(WITH_GPU)
@@ -405,7 +410,7 @@ configure_file(commit.h.in commit.h)
405410
# Adapt to custom op mechanism: Include the header files related to the data type
406411
# to avoid exposing the path of the underlying file
407412
include_directories(${PADDLE_SOURCE_DIR}/paddle/fluid/platform)
408-
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../extension/include)
413+
include_directories(${PADDLE_SOURCE_DIR}/paddle/fluid/extension/include)
409414

410415
if(WITH_ROCM)
411416
hip_library(custom_tensor SRCS ../extension/src/ext_tensor.cc DEPS lod_tensor memory enforce)

paddle/fluid/framework/details/CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,11 +134,14 @@ set(IR_PASS_DEPS graph_viz_pass multi_devices_graph_pass
134134
modify_op_lock_and_record_event_pass
135135
coalesce_grad_tensor_pass fuse_all_reduce_op_pass backward_optimizer_op_deps_pass
136136
fuse_adam_op_pass fuse_sgd_op_pass fuse_momentum_op_pass
137-
sync_batch_norm_pass runtime_context_cache_pass)
137+
sync_batch_norm_pass runtime_context_cache_pass graph_to_program_pass
138+
fix_op_run_order_pass)
138139
if(NOT APPLE AND NOT WIN32 AND (WITH_GPU OR WITH_ROCM))
139140
set(IR_PASS_DEPS ${IR_PASS_DEPS} fusion_group_pass)
140141
endif()
141142
cc_library(build_strategy SRCS build_strategy.cc DEPS pass_builder ${IR_PASS_DEPS})
143+
cc_test(build_strategy_test SRCS build_strategy_test.cc
144+
DEPS build_strategy op_registry op_proto_maker graph)
142145

143146
if (WITH_MKLDNN)
144147
target_link_libraries(build_strategy mkldnn_placement_pass)

paddle/fluid/framework/details/build_strategy.cc

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ limitations under the License. */
2020
#include "paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h"
2121

2222
DECLARE_bool(use_mkldnn);
23+
DECLARE_bool(convert_all_blocks);
2324

2425
namespace paddle {
2526
namespace framework {
@@ -312,6 +313,11 @@ ir::Graph *BuildStrategy::Apply(ir::Graph *graph,
312313
DeviceType use_device) const {
313314
#endif
314315
VLOG(1) << "apply all passes";
316+
if (FLAGS_convert_all_blocks) {
317+
PADDLE_ENFORCE_EQ(
318+
graph->IsMainGraph(), true,
319+
platform::errors::InvalidArgument("This graph is not main_graph"));
320+
}
315321
// Create a default one if not finalized by user.
316322
CreatePassesFromStrategy(false);
317323

@@ -432,7 +438,14 @@ ir::Graph *BuildStrategy::Apply(ir::Graph *graph,
432438
}
433439
}
434440
VLOG(1) << "Start Apply Pass " << pass->Type();
435-
graph = pass->Apply(graph);
441+
if (FLAGS_convert_all_blocks) {
442+
for (size_t i = 0; i < graph->SubGraphsSize(); ++i) {
443+
VLOG(3) << "Apply Pass " << pass->Type() << "to SubGraph " << i;
444+
pass->Apply(graph->GetSubGraph(i));
445+
}
446+
} else {
447+
graph = pass->Apply(graph);
448+
}
436449
VLOG(1) << "Finish Apply Pass " << pass->Type();
437450
}
438451
VLOG(1) << "All Passes Applied";

paddle/fluid/framework/details/build_strategy.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,9 @@ struct BuildStrategy {
100100
// while running.
101101
bool cache_runtime_context_{false};
102102

103+
// Fix the op run order.
104+
bool fix_op_run_order_{false};
105+
103106
// Operator fusion
104107
// TODO(dev-paddle): fuse_elewise_add_act_ops may cause some models have
105108
// cycle.

0 commit comments

Comments
 (0)