PaddlePaddle
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 4 additions & 1 deletion b/‎.pre-commit-config.yaml‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎cmake/cupti.cmake‎
Lines changed: 1 addition & 0 deletions b/‎cmake/cupti.cmake‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/fluid/extension/src/ext_tensor.cu‎
Lines changed: 1 addition & 0 deletions b/‎paddle/fluid/extension/src/ext_tensor.cu‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/fluid/framework/CMakeLists.txt‎
Lines changed: 12 additions & 3 deletions b/‎paddle/fluid/framework/CMakeLists.txt‎
Lines changed: 12 additions & 3 deletions
diff --git a/‎paddle/fluid/framework/block_desc.cc‎
Lines changed: 36 additions & 0 deletions b/‎paddle/fluid/framework/block_desc.cc‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/block_desc.h‎
Lines changed: 2 additions & 0 deletions b/‎paddle/fluid/framework/block_desc.h‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/custom_operator.cc‎
Lines changed: 7 additions & 1 deletion b/‎paddle/fluid/framework/custom_operator.cc‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎paddle/fluid/framework/custom_tensor_test.cc‎
Lines changed: 7 additions & 0 deletions b/‎paddle/fluid/framework/custom_tensor_test.cc‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/details/build_strategy.cc‎
Lines changed: 3 additions & 3 deletions b/‎paddle/fluid/framework/details/build_strategy.cc‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎paddle/fluid/framework/details/build_strategy.h‎
Lines changed: 9 additions & 4 deletions b/‎paddle/fluid/framework/details/build_strategy.h‎
Lines changed: 9 additions & 4 deletions
@@ -49,4 +49,7 @@ repos:
         entry: python ./tools/codestyle/copyright.hook
         language: system
         files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py|sh)$
-        exclude: (?!.*third_party)^.*$ | (?!.*book)^.*$
+        exclude: |
+            (?x)^(
+                paddle/utils/.*
+            )$
@@ -9,6 +9,7 @@ find_path(CUPTI_INCLUDE_DIR cupti.h
         $ENV{CUPTI_ROOT} $ENV{CUPTI_ROOT}/include
         ${CUDA_TOOLKIT_ROOT_DIR}/extras/CUPTI/include
         ${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/include
+        ${CUDA_TOOLKIT_ROOT_DIR}/targets/aarch64-linux/include
         NO_DEFAULT_PATH
         )
 
 
@@ -0,0 +1 @@
+ext_tensor.cc
@@ -17,14 +17,15 @@ function(windows_symbolic TARGET)
     add_custom_command(OUTPUT ${final_path}/.${src}.cu
             COMMAND ${CMAKE_COMMAND} -E copy_if_different "${final_path}/${src}.cc" "${final_path}/.${src}.cu"
             COMMENT "create hidden file of ${src}.cu")
-    add_custom_target(${TARGET} ALL DEPENDS .${src}.cu)
+    add_custom_target(${TARGET} ALL DEPENDS ${final_path}/.${src}.cu)
   endforeach()
 endfunction()
 
 add_subdirectory(ir)
 add_subdirectory(details)
 add_subdirectory(fleet)
 add_subdirectory(io)
+add_subdirectory(new_executor)
 #ddim lib
 proto_library(framework_proto SRCS framework.proto)
 
@@ -413,8 +414,16 @@ include_directories(${PADDLE_SOURCE_DIR}/paddle/fluid/platform)
 include_directories(${PADDLE_SOURCE_DIR}/paddle/fluid/extension/include)
 include_directories(${PADDLE_SOURCE_DIR}/paddle/utils)
 
-if(WITH_ROCM)
-  hip_library(custom_tensor SRCS ../extension/src/ext_tensor.cc DEPS lod_tensor memory enforce)
+if (WITH_GPU)
+  if (WIN32)
+    windows_symbolic(ext_tensor_cu SRCS ext_tensor.cu PATH ../extension/src)
+    nv_library(custom_tensor SRCS ../extension/src/.ext_tensor.cu DEPS lod_tensor memory enforce)
+    add_dependencies(custom_tensor ext_tensor_cu)
+  else()
+    nv_library(custom_tensor SRCS ../extension/src/ext_tensor.cu DEPS lod_tensor memory enforce)
+  endif(WIN32)
+elseif (WITH_ROCM)
+  hip_library(custom_tensor SRCS ../extension/src/ext_tensor.cu DEPS lod_tensor memory enforce)
 else()
   cc_library(custom_tensor SRCS ../extension/src/ext_tensor.cc DEPS lod_tensor memory enforce)
 endif()
 
@@ -238,5 +238,41 @@ BlockDesc *BlockDesc::ForwardBlock() const {
   return prog_->MutableBlock(static_cast<size_t>(desc_->forward_block_idx()));
 }
 
+void BlockDesc::MoveFrom(BlockDesc *block) {
+  PADDLE_ENFORCE_NOT_NULL(
+      block, platform::errors::InvalidArgument("Block must be provided."));
+  if (this == block) {
+    return;
+  }
+
+  for (auto &pair : block->vars_) {
+    const auto &name = pair.first;
+    auto &var_ptr = pair.second;
+    auto &old_var_ptr = vars_[name];
+    if (old_var_ptr == nullptr) {
+      VLOG(10) << "Create new variable " << var_ptr->Name();
+      old_var_ptr = std::move(var_ptr);
+    } else {
+      // NOTE(zjl): cannot release old_var_ptr, because Python
+      // Variable holds the reference of the C++ VarDesc object.
+      // If the C++ VarDesc object is destructed, any call to the
+      // methods of Python Variable may raise segmentation fault.
+      VLOG(10) << "Update old variable " << var_ptr->Name();
+      *old_var_ptr = *var_ptr;
+    }
+  }
+  ops_.clear();
+  for (const auto &src_op : block->ops_) {
+    AppendOp()->CopyFrom(*src_op);
+  }
+  need_update_ = true;
+  Flush();
+
+  block->ops_.clear();
+  block->vars_.clear();
+  block->need_update_ = true;
+  block->Flush();
+}
+
 }  // namespace framework
 }  // namespace paddle
@@ -111,6 +111,8 @@ class BlockDesc {
 
   ProgramDesc *Program() const { return this->prog_; }
 
+  void MoveFrom(BlockDesc *block);
+
  private:
   ProgramDesc *prog_;       // not_own
   proto::BlockDesc *desc_;  // not_own
 
@@ -517,6 +517,12 @@ void RegisterOperatorWithMetaInfo(
   auto& base_op_meta = op_meta_infos.front();
 
   auto op_name = OpMetaInfoHelper::GetOpName(base_op_meta);
+
+  if (OpInfoMap::Instance().Has(op_name)) {
+    LOG(WARNING) << "Operator (" << op_name << ")has been registered.";
+    return;
+  }
+
   auto& op_inputs = OpMetaInfoHelper::GetInputs(base_op_meta);
   auto& op_outputs = OpMetaInfoHelper::GetOutputs(base_op_meta);
   auto& op_attrs = OpMetaInfoHelper::GetAttrs(base_op_meta);
@@ -867,7 +873,7 @@ void RegisterOperatorWithMetaInfoMap(
 // load op api
 void LoadOpMetaInfoAndRegisterOp(const std::string& dso_name) {
   void* handle = paddle::platform::dynload::GetOpDsoHandle(dso_name);
-
+  VLOG(1) << "load custom_op lib: " << dso_name;
   typedef OpMetaInfoMap& get_op_meta_info_map_t();
   auto* get_op_meta_info_map =
       detail::DynLoad<get_op_meta_info_map_t>(handle, "PD_GetOpMetaInfoMap");
 
@@ -144,6 +144,13 @@ void TestCast(paddle::DataType data_type) {
   t1.template mutable_data<T>();
   auto t2 = t1.cast(data_type);
   CHECK(t2.type() == data_type);
+#ifdef PADDLE_WITH_CUDA
+  auto tg1 = paddle::Tensor(paddle::PlaceType::kGPU);
+  tg1.reshape(tensor_shape);
+  tg1.template mutable_data<T>();
+  auto tg2 = tg1.cast(data_type);
+  CHECK(tg2.type() == data_type);
+#endif
 }
 
 void GroupTestCopy() {
 
@@ -36,8 +36,8 @@ static inline bool SeqOnlyAllReduceOps(const BuildStrategy &strategy) {
          !strategy.enable_parallel_graph_;
 }
 
-static inline void ConvertDefaultValue(boost::optional<bool> *default_value) {
-  if (*default_value == boost::none) {
+static inline void ConvertDefaultValue(paddle::optional<bool> *default_value) {
+  if (*default_value == paddle::none) {
     *default_value = true;
   }
 }
@@ -247,7 +247,7 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
     }
   }
 
-  void AppendPassWithCheck(const boost::optional<bool> &append_pass,
+  void AppendPassWithCheck(const paddle::optional<bool> &append_pass,
                            const std::string &pass_name) {
     AppendPassWithCheck(append_pass == true, pass_name);
   }
 
@@ -112,16 +112,16 @@ struct BuildStrategy {
   bool enable_auto_fusion_{false};
   // Fuse_all_optimizer_ops and fuse_all_reduce_ops require that gradients
   // should not be sparse types
-  boost::optional<bool> fuse_all_optimizer_ops_{false};
-  boost::optional<bool> fuse_all_reduce_ops_{boost::none};
+  paddle::optional<bool> fuse_all_optimizer_ops_{false};
+  paddle::optional<bool> fuse_all_reduce_ops_{boost::none};
   // fuse_relu_depthwise_conv can fuse the `relu ->
   // depthwise_conv`
   bool fuse_relu_depthwise_conv_{false};
   // NOTE(zcd): In reduce mode, fusing broadcast ops may make the program
   // faster. Because fusing broadcast OP equals delaying the execution of all
   // broadcast Ops, in this case, all nccl streams are used only for reduce
   // operations for a period of time.
-  boost::optional<bool> fuse_broadcast_ops_{boost::none};
+  paddle::optional<bool> fuse_broadcast_ops_{boost::none};
   // replace batch_norm with sync_batch_norm.
   bool sync_batch_norm_{false};
 
@@ -135,7 +135,7 @@ struct BuildStrategy {
   // By default, memory_optimize would be opened if gc is disabled, and
   // be closed if gc is enabled.
   // Users can forcely enable/disable memory_optimize by setting True/False.
-  boost::optional<bool> memory_optimize_{boost::none};
+  paddle::optional<bool> memory_optimize_{boost::none};
 
   // Turn on inplace by default.
   bool enable_inplace_{true};
@@ -180,6 +180,11 @@ struct BuildStrategy {
 
   bool IsFinalized() const { return is_finalized_; }
 
+  void ClearFinalized() {
+    pass_builder_ = nullptr;
+    is_finalized_ = false;
+  }
+
   bool IsMultiDevPass(const std::string &pass_name) const;
 
   // Apply the passes built by the pass_builder_. The passes will be
Original file line number	Diff line number	Diff line change
`@@ -9,6 +9,7 @@ find_path(CUPTI_INCLUDE_DIR cupti.h`
`9`	`9`	`$ENV{CUPTI_ROOT} $ENV{CUPTI_ROOT}/include`
`10`	`10`	`${CUDA_TOOLKIT_ROOT_DIR}/extras/CUPTI/include`
`11`	`11`	`${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/include`
	`12`	`+ ${CUDA_TOOLKIT_ROOT_DIR}/targets/aarch64-linux/include`
`12`	`13`	`NO_DEFAULT_PATH`
`13`	`14`	`)`
`14`	`15`
Original file line number	Diff line number	Diff line change
`@@ -36,8 +36,8 @@ static inline bool SeqOnlyAllReduceOps(const BuildStrategy &strategy) {`
`36`	`36`	`!strategy.enable_parallel_graph_;`
`37`	`37`	`}`
`38`	`38`
`39`		`-static inline void ConvertDefaultValue(boost::optional<bool> *default_value) {`
`40`		`- if (*default_value == boost::none) {`
	`39`	`+static inline void ConvertDefaultValue(paddle::optional<bool> *default_value) {`
	`40`	`+ if (*default_value == paddle::none) {`
`41`	`41`	`*default_value = true;`
`42`	`42`	`}`
`43`	`43`	`}`
`@@ -247,7 +247,7 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {`
`247`	`247`	`}`
`248`	`248`	`}`
`249`	`249`
`250`		`- void AppendPassWithCheck(const boost::optional<bool> &append_pass,`
	`250`	`+ void AppendPassWithCheck(const paddle::optional<bool> &append_pass,`
`251`	`251`	`const std::string &pass_name) {`
`252`	`252`	`AppendPassWithCheck(append_pass == true, pass_name);`
`253`	`253`	`}`