Skip to content

Commit 7cdc1c4

Browse files
committed
Merge branch 'develop' into pad3d_op_npu
merge newest develop branch from PaddlePaddle/Paddle
2 parents caebfba + 4641e8f commit 7cdc1c4

File tree

315 files changed

+25480
-2692
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

315 files changed

+25480
-2692
lines changed

.pre-commit-config.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,7 @@ repos:
4949
entry: python ./tools/codestyle/copyright.hook
5050
language: system
5151
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py|sh)$
52-
exclude: (?!.*third_party)^.*$ | (?!.*book)^.*$
52+
exclude: |
53+
(?x)^(
54+
paddle/utils/.*
55+
)$

cmake/cupti.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ find_path(CUPTI_INCLUDE_DIR cupti.h
99
$ENV{CUPTI_ROOT} $ENV{CUPTI_ROOT}/include
1010
${CUDA_TOOLKIT_ROOT_DIR}/extras/CUPTI/include
1111
${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/include
12+
${CUDA_TOOLKIT_ROOT_DIR}/targets/aarch64-linux/include
1213
NO_DEFAULT_PATH
1314
)
1415

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ext_tensor.cc

paddle/fluid/framework/CMakeLists.txt

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,15 @@ function(windows_symbolic TARGET)
1717
add_custom_command(OUTPUT ${final_path}/.${src}.cu
1818
COMMAND ${CMAKE_COMMAND} -E copy_if_different "${final_path}/${src}.cc" "${final_path}/.${src}.cu"
1919
COMMENT "create hidden file of ${src}.cu")
20-
add_custom_target(${TARGET} ALL DEPENDS .${src}.cu)
20+
add_custom_target(${TARGET} ALL DEPENDS ${final_path}/.${src}.cu)
2121
endforeach()
2222
endfunction()
2323

2424
add_subdirectory(ir)
2525
add_subdirectory(details)
2626
add_subdirectory(fleet)
2727
add_subdirectory(io)
28+
add_subdirectory(new_executor)
2829
#ddim lib
2930
proto_library(framework_proto SRCS framework.proto)
3031

@@ -413,8 +414,16 @@ include_directories(${PADDLE_SOURCE_DIR}/paddle/fluid/platform)
413414
include_directories(${PADDLE_SOURCE_DIR}/paddle/fluid/extension/include)
414415
include_directories(${PADDLE_SOURCE_DIR}/paddle/utils)
415416

416-
if(WITH_ROCM)
417-
hip_library(custom_tensor SRCS ../extension/src/ext_tensor.cc DEPS lod_tensor memory enforce)
417+
if (WITH_GPU)
418+
if (WIN32)
419+
windows_symbolic(ext_tensor_cu SRCS ext_tensor.cu PATH ../extension/src)
420+
nv_library(custom_tensor SRCS ../extension/src/.ext_tensor.cu DEPS lod_tensor memory enforce)
421+
add_dependencies(custom_tensor ext_tensor_cu)
422+
else()
423+
nv_library(custom_tensor SRCS ../extension/src/ext_tensor.cu DEPS lod_tensor memory enforce)
424+
endif(WIN32)
425+
elseif (WITH_ROCM)
426+
hip_library(custom_tensor SRCS ../extension/src/ext_tensor.cu DEPS lod_tensor memory enforce)
418427
else()
419428
cc_library(custom_tensor SRCS ../extension/src/ext_tensor.cc DEPS lod_tensor memory enforce)
420429
endif()

paddle/fluid/framework/block_desc.cc

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,5 +238,41 @@ BlockDesc *BlockDesc::ForwardBlock() const {
238238
return prog_->MutableBlock(static_cast<size_t>(desc_->forward_block_idx()));
239239
}
240240

241+
void BlockDesc::MoveFrom(BlockDesc *block) {
242+
PADDLE_ENFORCE_NOT_NULL(
243+
block, platform::errors::InvalidArgument("Block must be provided."));
244+
if (this == block) {
245+
return;
246+
}
247+
248+
for (auto &pair : block->vars_) {
249+
const auto &name = pair.first;
250+
auto &var_ptr = pair.second;
251+
auto &old_var_ptr = vars_[name];
252+
if (old_var_ptr == nullptr) {
253+
VLOG(10) << "Create new variable " << var_ptr->Name();
254+
old_var_ptr = std::move(var_ptr);
255+
} else {
256+
// NOTE(zjl): cannot release old_var_ptr, because Python
257+
// Variable holds the reference of the C++ VarDesc object.
258+
// If the C++ VarDesc object is destructed, any call to the
259+
// methods of Python Variable may raise segmentation fault.
260+
VLOG(10) << "Update old variable " << var_ptr->Name();
261+
*old_var_ptr = *var_ptr;
262+
}
263+
}
264+
ops_.clear();
265+
for (const auto &src_op : block->ops_) {
266+
AppendOp()->CopyFrom(*src_op);
267+
}
268+
need_update_ = true;
269+
Flush();
270+
271+
block->ops_.clear();
272+
block->vars_.clear();
273+
block->need_update_ = true;
274+
block->Flush();
275+
}
276+
241277
} // namespace framework
242278
} // namespace paddle

paddle/fluid/framework/block_desc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ class BlockDesc {
111111

112112
ProgramDesc *Program() const { return this->prog_; }
113113

114+
void MoveFrom(BlockDesc *block);
115+
114116
private:
115117
ProgramDesc *prog_; // not_own
116118
proto::BlockDesc *desc_; // not_own

paddle/fluid/framework/custom_operator.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,12 @@ void RegisterOperatorWithMetaInfo(
517517
auto& base_op_meta = op_meta_infos.front();
518518

519519
auto op_name = OpMetaInfoHelper::GetOpName(base_op_meta);
520+
521+
if (OpInfoMap::Instance().Has(op_name)) {
522+
LOG(WARNING) << "Operator (" << op_name << ")has been registered.";
523+
return;
524+
}
525+
520526
auto& op_inputs = OpMetaInfoHelper::GetInputs(base_op_meta);
521527
auto& op_outputs = OpMetaInfoHelper::GetOutputs(base_op_meta);
522528
auto& op_attrs = OpMetaInfoHelper::GetAttrs(base_op_meta);
@@ -867,7 +873,7 @@ void RegisterOperatorWithMetaInfoMap(
867873
// load op api
868874
void LoadOpMetaInfoAndRegisterOp(const std::string& dso_name) {
869875
void* handle = paddle::platform::dynload::GetOpDsoHandle(dso_name);
870-
876+
VLOG(1) << "load custom_op lib: " << dso_name;
871877
typedef OpMetaInfoMap& get_op_meta_info_map_t();
872878
auto* get_op_meta_info_map =
873879
detail::DynLoad<get_op_meta_info_map_t>(handle, "PD_GetOpMetaInfoMap");

paddle/fluid/framework/custom_tensor_test.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,13 @@ void TestCast(paddle::DataType data_type) {
144144
t1.template mutable_data<T>();
145145
auto t2 = t1.cast(data_type);
146146
CHECK(t2.type() == data_type);
147+
#ifdef PADDLE_WITH_CUDA
148+
auto tg1 = paddle::Tensor(paddle::PlaceType::kGPU);
149+
tg1.reshape(tensor_shape);
150+
tg1.template mutable_data<T>();
151+
auto tg2 = tg1.cast(data_type);
152+
CHECK(tg2.type() == data_type);
153+
#endif
147154
}
148155

149156
void GroupTestCopy() {

paddle/fluid/framework/details/build_strategy.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ static inline bool SeqOnlyAllReduceOps(const BuildStrategy &strategy) {
3636
!strategy.enable_parallel_graph_;
3737
}
3838

39-
static inline void ConvertDefaultValue(boost::optional<bool> *default_value) {
40-
if (*default_value == boost::none) {
39+
static inline void ConvertDefaultValue(paddle::optional<bool> *default_value) {
40+
if (*default_value == paddle::none) {
4141
*default_value = true;
4242
}
4343
}
@@ -247,7 +247,7 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
247247
}
248248
}
249249

250-
void AppendPassWithCheck(const boost::optional<bool> &append_pass,
250+
void AppendPassWithCheck(const paddle::optional<bool> &append_pass,
251251
const std::string &pass_name) {
252252
AppendPassWithCheck(append_pass == true, pass_name);
253253
}

paddle/fluid/framework/details/build_strategy.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,16 +112,16 @@ struct BuildStrategy {
112112
bool enable_auto_fusion_{false};
113113
// Fuse_all_optimizer_ops and fuse_all_reduce_ops require that gradients
114114
// should not be sparse types
115-
boost::optional<bool> fuse_all_optimizer_ops_{false};
116-
boost::optional<bool> fuse_all_reduce_ops_{boost::none};
115+
paddle::optional<bool> fuse_all_optimizer_ops_{false};
116+
paddle::optional<bool> fuse_all_reduce_ops_{boost::none};
117117
// fuse_relu_depthwise_conv can fuse the `relu ->
118118
// depthwise_conv`
119119
bool fuse_relu_depthwise_conv_{false};
120120
// NOTE(zcd): In reduce mode, fusing broadcast ops may make the program
121121
// faster. Because fusing broadcast OP equals delaying the execution of all
122122
// broadcast Ops, in this case, all nccl streams are used only for reduce
123123
// operations for a period of time.
124-
boost::optional<bool> fuse_broadcast_ops_{boost::none};
124+
paddle::optional<bool> fuse_broadcast_ops_{boost::none};
125125
// replace batch_norm with sync_batch_norm.
126126
bool sync_batch_norm_{false};
127127

@@ -135,7 +135,7 @@ struct BuildStrategy {
135135
// By default, memory_optimize would be opened if gc is disabled, and
136136
// be closed if gc is enabled.
137137
// Users can forcely enable/disable memory_optimize by setting True/False.
138-
boost::optional<bool> memory_optimize_{boost::none};
138+
paddle::optional<bool> memory_optimize_{boost::none};
139139

140140
// Turn on inplace by default.
141141
bool enable_inplace_{true};
@@ -180,6 +180,11 @@ struct BuildStrategy {
180180

181181
bool IsFinalized() const { return is_finalized_; }
182182

183+
void ClearFinalized() {
184+
pass_builder_ = nullptr;
185+
is_finalized_ = false;
186+
}
187+
183188
bool IsMultiDevPass(const std::string &pass_name) const;
184189

185190
// Apply the passes built by the pass_builder_. The passes will be

0 commit comments

Comments
 (0)