add SetGradVarBase method to set the gradient after conversion

MingMingShangTian · MingMingShangTian · commit bc85ae4a9444 · 2021-04-09T03:39:31.000Z
diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h
@@ -108,6 +108,10 @@ class VarBase {
 
   void ClearGradVarBase() { grad_var_ = nullptr; }
 
+  void SetGradVarBase(VarBase& grad_var) {
+    MutableGradVarBase()->CopyFrom(grad_var, true);
+  }
+
   const std::shared_ptr<VarBase>& MutableGradVarBase() {
     if (grad_var_ == nullptr) {
       if (auto grad_var_wrapper = var_->GetGradVar()) {
diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc
@@ -1033,6 +1033,10 @@ void BindImperative(py::module *m_ptr) {
              return std::shared_ptr<imperative::VarBase>(nullptr);
            },
            py::return_value_policy::copy)
+      .def("_set_grad_ivar",
+           [](imperative::VarBase &self, imperative::VarBase &grad) {
+             self.SetGradVarBase(grad);
+           })
       .def("_is_sparse",
            [](imperative::VarBase &self) {
              return self.Var().IsType<framework::SelectedRows>();
@@ -1298,22 +1302,28 @@ void BindImperative(py::module *m_ptr) {
                     &imperative::VarBase::SetOverridedStopGradient)
       .def_property("persistable", &imperative::VarBase::Persistable,
                     &imperative::VarBase::SetPersistable)
-      .def_property_readonly(
-          "shape",
-          [](imperative::VarBase &self) {
-            if (self.Var().IsType<framework::LoDTensor>()) {
-              return framework::vectorize<int>(
-                  self.Var().Get<framework::LoDTensor>().dims());
-            } else if (self.Var().IsType<framework::SelectedRows>()) {
-              return framework::vectorize<int>(
-                  self.Var().Get<framework::SelectedRows>().value().dims());
-            } else {
-              VLOG(2) << "It is meaningless to get shape of "
-                         "variable type "
-                      << GetTypeName(self);
-              return std::vector<int>();
-            }
-          })
+      .def_property_readonly("shape",
+                             [](imperative::VarBase &self) {
+                               if (self.Var().IsType<framework::LoDTensor>()) {
+                                 return framework::vectorize<int>(
+                                     self.Var()
+                                         .Get<framework::LoDTensor>()
+                                         .dims());
+                               } else if (self.Var()
+                                              .IsType<
+                                                  framework::SelectedRows>()) {
+                                 return framework::vectorize<int>(
+                                     self.Var()
+                                         .Get<framework::SelectedRows>()
+                                         .value()
+                                         .dims());
+                               } else {
+                                 VLOG(2) << "It is meaningless to get shape of "
+                                            "variable type "
+                                         << GetTypeName(self);
+                                 return std::vector<int>();
+                               }
+                             })
       .def_property_readonly("is_leaf", &imperative::VarBase::IsLeaf,
                              R"DOC(
       Whether a Tensor is leaf Tensor.
diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py
@@ -1358,11 +1358,12 @@ def _apply(self, func, place, dtype, blocking):
 
                 if param.grad is not None:
                     with no_grad():
-                        grad_applied = func(param.grad, place, dtype, blocking)
+                        grad_applied = func(param._grad_ivar(), place, dtype,
+                                            blocking)
 
-                        assert param.grad.is_leaf
-                        grad_applied.stop_gradient = param.grad.stop_gradient
-                        self._parameters[key].grad = grad_applied
+                        grad_applied.stop_gradient = param._grad_ivar(
+                        ).stop_gradient
+                        self._parameters[key]._set_grad_ivar(grad_applied)
 
         for key, buf in self._buffers.items():
             self._buffers[key] = func(buf, place, dtype, blocking)
@@ -1376,7 +1377,7 @@ def to(self, place=None, dtype=None, blocking=None):
             If None, the place is the same with the origin Tensor. If place is string, it can be ``cpu``, ``gpu:x`` and ``xpu:x``, where ``x`` is the 
             index of the GPUs or XPUs. Default: None. 
             
-            dtype(str|core.VarDesc.VarType|None, optional): the type of the data. If None, the dtype is the same with the origin Tensor. Default: None.
+            dtype(str|core.VarDesc.VarType|None, optional): The type of the data. If None, the dtype is the same with the origin Tensor. Default: None.
 
             blocking(bool|None, optional): If False and the source is in pinned memory, the copy will be 
               asynchronous with respect to the host. Otherwise, the argument has no effect. If None, the blocking is set True. Default: None.
@@ -1402,7 +1403,7 @@ def to(self, place=None, dtype=None, blocking=None):
                 #       [[-0.32770029,  0.38653070],
                 #        [ 0.46030545,  0.08158520]])
 
-                linear.to(place=paddle.CPUPlace())
+                linear.to(place='cpu')
                 linear.weight
                 #Tensor(shape=[2, 2], dtype=float64, place=CPUPlace, stop_gradient=False,
                 #       [[-0.32770029,  0.38653070],
diff --git a/python/paddle/fluid/tests/unittests/test_base_layer.py b/python/paddle/fluid/tests/unittests/test_base_layer.py
@@ -335,39 +335,68 @@ class TestLayerTo(unittest.TestCase):
     def setUp(self):
         paddle.disable_static()
         self.linear = paddle.nn.Linear(2, 2)
+        self.new_grad = np.random.random([2, 2])
+        self.linear.weight._set_grad_ivar(paddle.to_tensor(self.new_grad))
         buffer = paddle.to_tensor([0.0], dtype='float32')
         self.linear.register_buffer("buf_name", buffer, persistable=True)
 
+        sublayer = paddle.nn.Conv1D(3, 2, 3)
+        self.linear.add_sublayer(1, sublayer)
+
     def test_to_api(self):
         self.linear.to(dtype='double')
         self.assertEqual(self.linear.weight.dtype,
                          paddle.fluid.core.VarDesc.VarType.FP64)
         self.assertEqual(self.linear.buf_name.dtype,
                          paddle.fluid.core.VarDesc.VarType.FP64)
+        self.assertTrue(np.allclose(self.linear.weight.grad, self.new_grad))
+        self.assertTrue(self.linear.weight._grad_ivar().dtype,
+                        paddle.fluid.core.VarDesc.VarType.FP64)
+
+        self.linear.to()
+        self.assertEqual(self.linear.weight.dtype,
+                         paddle.fluid.core.VarDesc.VarType.FP64)
+        self.assertEqual(self.linear.buf_name.dtype,
+                         paddle.fluid.core.VarDesc.VarType.FP64)
+        self.assertTrue(np.allclose(self.linear.weight.grad, self.new_grad))
+        self.assertTrue(self.linear.weight._grad_ivar().dtype,
+                        paddle.fluid.core.VarDesc.VarType.FP64)
 
         if paddle.fluid.is_compiled_with_cuda():
             self.linear.to(place=paddle.CUDAPlace(0))
             self.assertTrue(self.linear.weight.place.is_gpu_place())
             self.assertEqual(self.linear.weight.place.gpu_device_id(), 0)
             self.assertTrue(self.linear.buf_name.place.is_gpu_place())
             self.assertEqual(self.linear.buf_name.place.gpu_device_id(), 0)
+            self.assertTrue(self.linear.weight._grad_ivar().place.is_gpu_place(
+            ))
+            self.assertEqual(
+                self.linear.weight._grad_ivar().place.gpu_device_id(), 0)
 
             self.linear.to(place='gpu:0')
             self.assertTrue(self.linear.weight.place.is_gpu_place())
             self.assertEqual(self.linear.weight.place.gpu_device_id(), 0)
             self.assertTrue(self.linear.buf_name.place.is_gpu_place())
             self.assertEqual(self.linear.buf_name.place.gpu_device_id(), 0)
+            self.assertTrue(self.linear.weight._grad_ivar().place.is_gpu_place(
+            ))
+            self.assertEqual(
+                self.linear.weight._grad_ivar().place.gpu_device_id(), 0)
 
         self.linear.to(place=paddle.CPUPlace())
         self.assertTrue(self.linear.weight.place.is_cpu_place())
         self.assertTrue(self.linear.buf_name.place.is_cpu_place())
+        self.assertTrue(self.linear.weight._grad_ivar().place.is_cpu_place())
 
         self.linear.to(place='cpu')
         self.assertTrue(self.linear.weight.place.is_cpu_place())
         self.assertTrue(self.linear.buf_name.place.is_cpu_place())
+        self.assertTrue(self.linear.weight._grad_ivar().place.is_cpu_place())
 
         self.assertRaises(ValueError, self.linear.to, place=1)
 
+        self.assertRaises(AssertionError, self.linear.to, blocking=1)
+
 
 if __name__ == '__main__':
     unittest.main()