Add inner register backward hook method for Tensor (#32171)

chenwhql · web-flow · commit 7ba85acad7c9 · 2021-04-14T14:32:32.000+08:00
* add register backward hook method

* add leaf grad accumullated test
diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc
@@ -284,15 +284,15 @@ static std::shared_ptr<NameVarMap<VariableWrapper>> CallGradientHooks(
   for (const auto& pair : bwd_ins) {
     for (size_t i = 0; i < pair.second.size(); ++i) {
       auto& var = pair.second[i];
-      if (var->HasHook()) {
+      if (var->HasVariableWrapperHook()) {
         if (tmp_ins_ptr == nullptr) {
           tmp_ins_ptr = std::make_shared<NameVarMap<VariableWrapper>>(bwd_ins);
         }
-        VLOG(3) << "Call " << var->GetHooks().size() << " hooks of " << op_type
-                << "'s input `" << pair.first << "`'s var `" << var->Name()
-                << "`.";
+        VLOG(3) << "Call " << var->GetVariableWrapperHooks().size()
+                << " hooks of " << op_type << "'s input `" << pair.first
+                << "`'s var `" << var->Name() << "`.";
         auto tmp_var = var;
-        for (const auto& hook_pair : var->GetHooks()) {
+        for (const auto& hook_pair : var->GetVariableWrapperHooks()) {
           tmp_var = (*hook_pair.second)(tmp_var);
         }
         (*tmp_ins_ptr)[pair.first][i] = tmp_var;
diff --git a/paddle/fluid/imperative/gradient_accumulator.cc b/paddle/fluid/imperative/gradient_accumulator.cc
@@ -467,14 +467,14 @@ void GradientAccumulator::CallGradientHooks() {
       platform::errors::PreconditionNotMet("Leaf Tensor's inner var "
                                            "is not initialized when "
                                            "call gradient hook."));
-  if (var_->HasHook()) {
-    VLOG(3) << "Call " << var_->GetHooks().size()
+  if (var_->HasVariableWrapperHook()) {
+    VLOG(3) << "Call " << var_->GetVariableWrapperHooks().size()
             << " hooks of leaf gradient accumulator's inner var `"
             << var_->Name() << "`.";
     auto tmp_var = inner_var_;
     VLOG(3) << "Input var " << var_->Name() << "'s hook size - "
-            << var_->GetHooks().size();
-    for (const auto& hook_pair : var_->GetHooks()) {
+            << var_->GetVariableWrapperHooks().size();
+    for (const auto& hook_pair : var_->GetVariableWrapperHooks()) {
       tmp_var = (*hook_pair.second)(tmp_var);
     }
     inner_var_ = tmp_var;
@@ -495,10 +495,10 @@ void GradientAccumulator::CallReduceHooks() {
                         "Only can call reduce hooks after the "
                         "gradient accumulation is completed in "
                         "current batch or across batchs."));
-  if (var_->HasMutableHook()) {
-    for (const auto& hook : var_->GetMutableHooks()) {
+  if (var_->HasVoidHook()) {
+    for (const auto& hook : var_->GetVoidHooks()) {
       VLOG(3) << "call gradient accumulator backward hooks.";
-      (*hook)(var_);
+      (*hook)();
     }
   }
 }
diff --git a/paddle/fluid/imperative/hooks.h b/paddle/fluid/imperative/hooks.h
@@ -23,32 +23,34 @@ namespace imperative {
 
 class VariableWrapper;
 
-/** [ Const VariableWrapper Hook: Pre hook functor of OpBase ]
+/** [ VariableWrapper Hook ]
  *
- * @brief This hook functor is executed before the grad OpBase is executed,
- *        taking the input of the current grad OpBase as input, and
- *        executing python hooks (user-defined) or C++ hooks (developer-defined)
- *        to achieve the purpose of custom operations on the interior VarBase
- *        gradient.
+ * @brief This hook functor is executed before the grad OpBase is executed or
+ *        after gradient accumulation completed in current batch.
+ *        1. For interior var, VariableWrapper Hook take the input of the
+ *        current grad OpBase as input.
+ *        2. For leaf var, VariableWrapper Hook take the inner_var_ of
+ *        GradientAccumulator as input.
  *
- * @note  This hook functor will not change the input gradient VarBase.
+ * @note  This hook functor will not change the input gradient VariableWrapper,
+ *        but if you copy the input VariableWrapper and change the value of
+ *        Variable in VariableWrapper, the value of input will also be changed,
+ *        because they shared same PlaceHolder.
  *
- * @note  [Why need to be OpBase `PreHook`, why not `PostHook`?]
+ * @note  [ Why need to be OpBase `PreHook`, why not `PostHook`? ]
  *
- *        1. We expect If set OpBase post hook, when the op executed end, the
+ *        We expect If set OpBase post hook, when the op executed end, the
  *        op's output gradient may not be the final state, because it may need
  *        other op's gradient output to accumulated to it. But before op can
  *        be executed, the gradient output must have been accumulated to final
  *        value.
- *        2. We don’t want the hook to change its input Tensor value, so now
- *        we can't call all hooks in GradAccumulator.
  *
- * @note  [Why only can be used for interior VarBase?]
+ * @note  [ Why Leaf gradient is special? ]
  *
  *        Because the leaf VarBase's GradVarBase has no GradOpNode, so leaf
  *        GradVarBase has no next OpBase to executed, so if need to deal with
- *        the leaf GradVarBase, cannot use this hook functor. For this case, we
- *        deal with by other inplace hook method.
+ *        the leaf GradVarBase, we should call hooks after gradient accumulation
+ *        completed.
  */
 class VariableWrapperHook {
  public:
@@ -57,34 +59,22 @@ class VariableWrapperHook {
       const std::shared_ptr<VariableWrapper>& var) = 0;
 };
 
-/** [ Inplace VariableWrapper Hook: Post hook functor of GradAccumulator ]
- *
- * @brief This hook functor is the Hook that operates on the current
- *        gradientafter the GradientAccumulator has accumulated the gradient.
- *        Leaf GradVarBase has no next OpBase, if we want to register hook
- *        for it, we also need to wait until the leaf GradVarBase accumulation
- *        is completed, so we can add post hook to GradientAccumulator.
- *
- * @note  This hook functor will change the grad VarBase value.
- *
- * @note  Only allow leaf VarBase hold call this hook functor.
- */
-class InplaceVariableWrapperHook {
- public:
-  virtual ~InplaceVariableWrapperHook() = default;
-  virtual void operator()(VariableWrapper* var) = 0;
-};
-
-class LambdaInplaceVariableWrapperHook : public InplaceVariableWrapperHook {
+class CppVariableWrapperHook : public VariableWrapperHook {
  public:
-  explicit LambdaInplaceVariableWrapperHook(
-      std::function<void(VariableWrapper*)>&& fn)
+  explicit CppVariableWrapperHook(
+      std::function<std::shared_ptr<VariableWrapper>(
+          const std::shared_ptr<VariableWrapper>&)>&& fn)
       : fn_(std::move(fn)) {}
 
-  void operator()(VariableWrapper* var) override { fn_(var); }
+  std::shared_ptr<VariableWrapper> operator()(
+      const std::shared_ptr<VariableWrapper>& var) override {
+    return fn_(var);
+  }
 
  private:
-  std::function<void(VariableWrapper*)> fn_;
+  std::function<std::shared_ptr<VariableWrapper>(
+      const std::shared_ptr<VariableWrapper>&)>
+      fn_;
 };
 
 }  // namespace imperative
diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h
@@ -226,23 +226,25 @@ class VarBase {
   void BumpInplaceVersion();
 
   /* Hook related method: now only used for GradVarBase */
-  bool HasHook() const { return var_->HasHook(); }
+  bool HasVariableWrapperHook() const { return var_->HasVariableWrapperHook(); }
 
-  int64_t AddHook(std::shared_ptr<VariableWrapperHook>&& hook) {
-    return var_->AddHook(
+  int64_t AddVariableWrapperHook(std::shared_ptr<VariableWrapperHook>&& hook) {
+    return var_->AddVariableWrapperHook(
         std::forward<std::shared_ptr<VariableWrapperHook>>(hook));
   }
 
-  bool RemoveHook(const int64_t& hook_id) { return var_->RemoveHook(hook_id); }
+  bool RemoveVariableWrapperHook(const int64_t& hook_id) {
+    return var_->RemoveVariableWrapperHook(hook_id);
+  }
 
-  const std::map<int64_t, std::shared_ptr<VariableWrapperHook>>& GetHooks()
-      const {
-    return var_->GetHooks();
+  const std::map<int64_t, std::shared_ptr<VariableWrapperHook>>&
+  GetVariableWrapperHooks() const {
+    return var_->GetVariableWrapperHooks();
   }
 
-  void AddMutableHook(std::shared_ptr<InplaceVariableWrapperHook>&& hook) {
-    var_->AddMutableHook(
-        std::forward<std::shared_ptr<InplaceVariableWrapperHook>>(hook));
+  void AddVoidHook(std::shared_ptr<std::function<void()>>&& hook) {
+    var_->AddVoidHook(
+        std::forward<std::shared_ptr<std::function<void()>>>(hook));
   }
 
  private:
diff --git a/paddle/fluid/imperative/reducer.cc b/paddle/fluid/imperative/reducer.cc
@@ -310,9 +310,8 @@ Reducer::Reducer(const std::vector<std::shared_ptr<imperative::VarBase>> &vars,
   for (size_t global_var_index = 0; global_var_index < vars_.size();
        ++global_var_index) {
     auto var = vars_[global_var_index];
-    var->GradVarBase()->AddMutableHook(
-        std::make_shared<LambdaInplaceVariableWrapperHook>([=](
-            VariableWrapper *grad) { this->AddDistHook(global_var_index); }));
+    var->GradVarBase()->AddVoidHook(std::make_shared<std::function<void()>>(
+        [=]() { this->AddDistHook(global_var_index); }));
     var_index_map_[var->GradVarBase()->SharedVar().get()] = global_var_index;
   }
 
diff --git a/paddle/fluid/imperative/tests/test_hooks.cc b/paddle/fluid/imperative/tests/test_hooks.cc
@@ -37,6 +37,30 @@ namespace imperative {
 using vb_vector = std::vector<std::shared_ptr<imperative::VarBase>>;
 using var_pair = std::pair<std::string, vb_vector>;
 
+std::shared_ptr<imperative::VariableWrapper> DoubleHook(
+    const std::shared_ptr<imperative::VariableWrapper>& var) {
+  // 1. create out var
+  auto out_var = std::make_shared<imperative::VariableWrapper>(var->Name());
+  out_var->SetType(var->Type());
+  out_var->SetDataType(var->DataType());
+  out_var->SetForwardDataType(var->ForwardDataType());
+  out_var->InnerSetOverridedStopGradient(var->InnerOverridedStopGradient());
+
+  // 2. get input and output var's tensor
+  auto* out_tensor = out_var->MutableVar()->GetMutable<framework::LoDTensor>();
+  auto& tensor = var->Var().Get<framework::LoDTensor>();
+  out_tensor->Resize(tensor.dims());
+
+  // 3. double calc
+  auto* data = tensor.data<float>();
+  auto* out_data = out_tensor->mutable_data<float>(platform::CPUPlace());
+  for (int64_t i = 0; i < out_tensor->numel(); ++i) {
+    out_data[i] = data[i] * 2.0;
+  }
+
+  return out_var;
+}
+
 TEST(TestHooks, TestGradVarLeafBackwardHook) {
   // 1. prepare
   Tracer tracer;
@@ -73,16 +97,14 @@ TEST(TestHooks, TestGradVarLeafBackwardHook) {
   framework::AttributeMap mul_attr_map;
   mul_attr_map["use_mkldnn"] = false;
 
-  // add GradAccumulatorPostHook
-  x->GradVarBase()->AddMutableHook(
-      std::make_shared<LambdaInplaceVariableWrapperHook>(
-          [=](VariableWrapper* grad) {
-            auto* grad_tensor =
-                grad->MutableVar()->GetMutable<framework::LoDTensor>();
-            for (int i = 0; i < grad_tensor->numel(); ++i) {
-              grad_tensor->mutable_data<float>(place)[i] *= 2.0;
-            }
-          }));
+  // add VariableWrapper hook
+  x->GradVarBase()->AddVariableWrapperHook(
+      std::make_shared<imperative::CppVariableWrapperHook>(DoubleHook));
+
+  // add Void hook
+  int64_t hook_value = 0;
+  x->GradVarBase()->AddVoidHook(
+      std::make_shared<std::function<void()>>([&]() { hook_value = 10; }));
 
   // 2. forward
   tracer.TraceOp("mul", ins, outs, mul_attr_map, place, true);
@@ -98,12 +120,15 @@ TEST(TestHooks, TestGradVarLeafBackwardHook) {
   engine.Init(tensors, grad_tensors);
   engine.Execute();
 
+  // verify VariableWrapper hook result
   framework::LoDTensor x_grad;
   framework::TensorCopySync(x->GradVar().Get<framework::LoDTensor>(), place,
                             &x_grad);
   for (int i = 0; i < x_grad.numel(); ++i) {
     ASSERT_EQ(x_grad.data<float>()[i], 8.0);
   }
+  // verify Void hook result
+  ASSERT_EQ(hook_value, 10);
 
   framework::LoDTensor y_grad;
   framework::TensorCopySync(y->GradVar().Get<framework::LoDTensor>(), place,
@@ -152,16 +177,14 @@ void GradVarLeafBackwardHookWithGradAccmulatedTest() {
   memory::Copy(place, mutable_z, place, src_data.data(),
                sizeof(float) * src_data.size());
 
-  // add ReduceBackwardHook
-  x->GradVarBase()->AddMutableHook(
-      std::make_shared<LambdaInplaceVariableWrapperHook>(
-          [=](VariableWrapper* grad) {
-            auto* grad_tensor =
-                grad->MutableVar()->GetMutable<framework::LoDTensor>();
-            for (int i = 0; i < grad_tensor->numel(); ++i) {
-              grad_tensor->mutable_data<float>(place)[i] *= 2.0;
-            }
-          }));
+  // add VariableWrapper hook
+  x->GradVarBase()->AddVariableWrapperHook(
+      std::make_shared<imperative::CppVariableWrapperHook>(DoubleHook));
+
+  // add Void hook
+  int64_t hook_value = 0;
+  x->GradVarBase()->AddVoidHook(
+      std::make_shared<std::function<void()>>([&]() { hook_value = 100; }));
 
   // 2. forward
   var_pair x_pair = var_pair("X", vb_vector(1, x));
@@ -199,12 +222,15 @@ void GradVarLeafBackwardHookWithGradAccmulatedTest() {
   engine.Init(tensors, grad_tensors);
   engine.Execute();
 
+  // verify VariableWrapper hook result
   framework::LoDTensor x_grad;
   framework::TensorCopySync(x->GradVar().Get<framework::LoDTensor>(), place,
                             &x_grad);
   for (int i = 0; i < x_grad.numel(); ++i) {
     ASSERT_EQ(x_grad.data<float>()[i], 16.0);
   }
+  // verify Void hook result
+  ASSERT_EQ(hook_value, 100);
 
   framework::LoDTensor y_grad;
   framework::TensorCopySync(y->GradVar().Get<framework::LoDTensor>(), place,
diff --git a/paddle/fluid/imperative/variable_wrapper.h b/paddle/fluid/imperative/variable_wrapper.h
@@ -220,35 +220,35 @@ class VariableWrapper {
   }
 
   /* Hook related methods */
-  bool HasHook() const { return !hooks_.empty(); }
+  bool HasVariableWrapperHook() const { return !var_hooks_.empty(); }
 
-  bool HasMutableHook() const { return !mutable_hooks_.empty(); }
-
-  int64_t AddHook(std::shared_ptr<VariableWrapperHook>&& hook) {
-    hooks_.emplace(next_hook_id_, std::move(hook));
+  int64_t AddVariableWrapperHook(std::shared_ptr<VariableWrapperHook>&& hook) {
+    var_hooks_.emplace(next_hook_id_, std::move(hook));
     return next_hook_id_++;
   }
 
-  bool RemoveHook(const int64_t& hook_id) {
-    auto remove_cnt = hooks_.erase(hook_id);
+  bool RemoveVariableWrapperHook(const int64_t& hook_id) {
+    auto remove_cnt = var_hooks_.erase(hook_id);
     if (remove_cnt == 0) {
       return false;
     }
     return true;
   }
 
-  const std::map<int64_t, std::shared_ptr<VariableWrapperHook>>& GetHooks()
-      const {
-    return hooks_;
+  const std::map<int64_t, std::shared_ptr<VariableWrapperHook>>&
+  GetVariableWrapperHooks() const {
+    return var_hooks_;
   }
 
-  void AddMutableHook(std::shared_ptr<InplaceVariableWrapperHook>&& hook) {
-    mutable_hooks_.emplace_back(std::move(hook));
+  bool HasVoidHook() const { return !void_hooks_.empty(); }
+
+  void AddVoidHook(std::shared_ptr<std::function<void()>>&& hook) {
+    void_hooks_.emplace_back(std::move(hook));
   }
 
-  const std::vector<std::shared_ptr<InplaceVariableWrapperHook>>&
-  GetMutableHooks() const {
-    return mutable_hooks_;
+  const std::vector<std::shared_ptr<std::function<void()>>>& GetVoidHooks()
+      const {
+    return void_hooks_;
   }
 
  private:
@@ -319,14 +319,19 @@ class VariableWrapper {
   // isn't need
   bool is_empty_{false};
 
-  // NOTE(chenweihang): only grad var can hold hooks now
+  // NOTE(chenweihang): only grad var will hold hooks now
   int64_t next_hook_id_{0};
-  // Hooks used to register hook for grad var, support adding and removing,
+  // [ Hooks with VariableWrapper as input and output ]
+  // NOTE: Now registered for grad var, support adding and removing,
   // key is the accumulated int64_t value
-  std::map<int64_t, std::shared_ptr<VariableWrapperHook>> hooks_;
-  // Hooks executed after the execution of the entire backward process is over,
-  // currently only supported for reducing in distributed training
-  std::vector<std::shared_ptr<InplaceVariableWrapperHook>> mutable_hooks_;
+  // NOTE: Var hook need to support removing, so need hook id
+  std::map<int64_t, std::shared_ptr<VariableWrapperHook>> var_hooks_;
+  // [ Hooks without input and output ]
+  // NOTE: Now registered after the execution of the entire backward
+  // process is over, currently only used for reducing in distributed
+  // training
+  // NOTE: Now no need to support remove void hook
+  std::vector<std::shared_ptr<std::function<void()>>> void_hooks_;
 };
 
 }  // namespace imperative
diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc
diff --git a/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py b/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py