Skip to content

Commit bc85ae4

Browse files
add SetGradVarBase method to set the gradient after conversion
1 parent dcac88e commit bc85ae4

File tree

4 files changed

+66
-22
lines changed

4 files changed

+66
-22
lines changed

paddle/fluid/imperative/layer.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,10 @@ class VarBase {
108108

109109
void ClearGradVarBase() { grad_var_ = nullptr; }
110110

111+
void SetGradVarBase(VarBase& grad_var) {
112+
MutableGradVarBase()->CopyFrom(grad_var, true);
113+
}
114+
111115
const std::shared_ptr<VarBase>& MutableGradVarBase() {
112116
if (grad_var_ == nullptr) {
113117
if (auto grad_var_wrapper = var_->GetGradVar()) {

paddle/fluid/pybind/imperative.cc

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,6 +1033,10 @@ void BindImperative(py::module *m_ptr) {
10331033
return std::shared_ptr<imperative::VarBase>(nullptr);
10341034
},
10351035
py::return_value_policy::copy)
1036+
.def("_set_grad_ivar",
1037+
[](imperative::VarBase &self, imperative::VarBase &grad) {
1038+
self.SetGradVarBase(grad);
1039+
})
10361040
.def("_is_sparse",
10371041
[](imperative::VarBase &self) {
10381042
return self.Var().IsType<framework::SelectedRows>();
@@ -1298,22 +1302,28 @@ void BindImperative(py::module *m_ptr) {
12981302
&imperative::VarBase::SetOverridedStopGradient)
12991303
.def_property("persistable", &imperative::VarBase::Persistable,
13001304
&imperative::VarBase::SetPersistable)
1301-
.def_property_readonly(
1302-
"shape",
1303-
[](imperative::VarBase &self) {
1304-
if (self.Var().IsType<framework::LoDTensor>()) {
1305-
return framework::vectorize<int>(
1306-
self.Var().Get<framework::LoDTensor>().dims());
1307-
} else if (self.Var().IsType<framework::SelectedRows>()) {
1308-
return framework::vectorize<int>(
1309-
self.Var().Get<framework::SelectedRows>().value().dims());
1310-
} else {
1311-
VLOG(2) << "It is meaningless to get shape of "
1312-
"variable type "
1313-
<< GetTypeName(self);
1314-
return std::vector<int>();
1315-
}
1316-
})
1305+
.def_property_readonly("shape",
1306+
[](imperative::VarBase &self) {
1307+
if (self.Var().IsType<framework::LoDTensor>()) {
1308+
return framework::vectorize<int>(
1309+
self.Var()
1310+
.Get<framework::LoDTensor>()
1311+
.dims());
1312+
} else if (self.Var()
1313+
.IsType<
1314+
framework::SelectedRows>()) {
1315+
return framework::vectorize<int>(
1316+
self.Var()
1317+
.Get<framework::SelectedRows>()
1318+
.value()
1319+
.dims());
1320+
} else {
1321+
VLOG(2) << "It is meaningless to get shape of "
1322+
"variable type "
1323+
<< GetTypeName(self);
1324+
return std::vector<int>();
1325+
}
1326+
})
13171327
.def_property_readonly("is_leaf", &imperative::VarBase::IsLeaf,
13181328
R"DOC(
13191329
Whether a Tensor is leaf Tensor.

python/paddle/fluid/dygraph/layers.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1358,11 +1358,12 @@ def _apply(self, func, place, dtype, blocking):
13581358

13591359
if param.grad is not None:
13601360
with no_grad():
1361-
grad_applied = func(param.grad, place, dtype, blocking)
1361+
grad_applied = func(param._grad_ivar(), place, dtype,
1362+
blocking)
13621363

1363-
assert param.grad.is_leaf
1364-
grad_applied.stop_gradient = param.grad.stop_gradient
1365-
self._parameters[key].grad = grad_applied
1364+
grad_applied.stop_gradient = param._grad_ivar(
1365+
).stop_gradient
1366+
self._parameters[key]._set_grad_ivar(grad_applied)
13661367

13671368
for key, buf in self._buffers.items():
13681369
self._buffers[key] = func(buf, place, dtype, blocking)
@@ -1376,7 +1377,7 @@ def to(self, place=None, dtype=None, blocking=None):
13761377
If None, the place is the same with the origin Tensor. If place is string, it can be ``cpu``, ``gpu:x`` and ``xpu:x``, where ``x`` is the
13771378
index of the GPUs or XPUs. Default: None.
13781379
1379-
dtype(str|core.VarDesc.VarType|None, optional): the type of the data. If None, the dtype is the same with the origin Tensor. Default: None.
1380+
dtype(str|core.VarDesc.VarType|None, optional): The type of the data. If None, the dtype is the same with the origin Tensor. Default: None.
13801381
13811382
blocking(bool|None, optional): If False and the source is in pinned memory, the copy will be
13821383
asynchronous with respect to the host. Otherwise, the argument has no effect. If None, the blocking is set True. Default: None.
@@ -1402,7 +1403,7 @@ def to(self, place=None, dtype=None, blocking=None):
14021403
# [[-0.32770029, 0.38653070],
14031404
# [ 0.46030545, 0.08158520]])
14041405
1405-
linear.to(place=paddle.CPUPlace())
1406+
linear.to(place='cpu')
14061407
linear.weight
14071408
#Tensor(shape=[2, 2], dtype=float64, place=CPUPlace, stop_gradient=False,
14081409
# [[-0.32770029, 0.38653070],

python/paddle/fluid/tests/unittests/test_base_layer.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,39 +335,68 @@ class TestLayerTo(unittest.TestCase):
335335
def setUp(self):
336336
paddle.disable_static()
337337
self.linear = paddle.nn.Linear(2, 2)
338+
self.new_grad = np.random.random([2, 2])
339+
self.linear.weight._set_grad_ivar(paddle.to_tensor(self.new_grad))
338340
buffer = paddle.to_tensor([0.0], dtype='float32')
339341
self.linear.register_buffer("buf_name", buffer, persistable=True)
340342

343+
sublayer = paddle.nn.Conv1D(3, 2, 3)
344+
self.linear.add_sublayer(1, sublayer)
345+
341346
def test_to_api(self):
342347
self.linear.to(dtype='double')
343348
self.assertEqual(self.linear.weight.dtype,
344349
paddle.fluid.core.VarDesc.VarType.FP64)
345350
self.assertEqual(self.linear.buf_name.dtype,
346351
paddle.fluid.core.VarDesc.VarType.FP64)
352+
self.assertTrue(np.allclose(self.linear.weight.grad, self.new_grad))
353+
self.assertTrue(self.linear.weight._grad_ivar().dtype,
354+
paddle.fluid.core.VarDesc.VarType.FP64)
355+
356+
self.linear.to()
357+
self.assertEqual(self.linear.weight.dtype,
358+
paddle.fluid.core.VarDesc.VarType.FP64)
359+
self.assertEqual(self.linear.buf_name.dtype,
360+
paddle.fluid.core.VarDesc.VarType.FP64)
361+
self.assertTrue(np.allclose(self.linear.weight.grad, self.new_grad))
362+
self.assertTrue(self.linear.weight._grad_ivar().dtype,
363+
paddle.fluid.core.VarDesc.VarType.FP64)
347364

348365
if paddle.fluid.is_compiled_with_cuda():
349366
self.linear.to(place=paddle.CUDAPlace(0))
350367
self.assertTrue(self.linear.weight.place.is_gpu_place())
351368
self.assertEqual(self.linear.weight.place.gpu_device_id(), 0)
352369
self.assertTrue(self.linear.buf_name.place.is_gpu_place())
353370
self.assertEqual(self.linear.buf_name.place.gpu_device_id(), 0)
371+
self.assertTrue(self.linear.weight._grad_ivar().place.is_gpu_place(
372+
))
373+
self.assertEqual(
374+
self.linear.weight._grad_ivar().place.gpu_device_id(), 0)
354375

355376
self.linear.to(place='gpu:0')
356377
self.assertTrue(self.linear.weight.place.is_gpu_place())
357378
self.assertEqual(self.linear.weight.place.gpu_device_id(), 0)
358379
self.assertTrue(self.linear.buf_name.place.is_gpu_place())
359380
self.assertEqual(self.linear.buf_name.place.gpu_device_id(), 0)
381+
self.assertTrue(self.linear.weight._grad_ivar().place.is_gpu_place(
382+
))
383+
self.assertEqual(
384+
self.linear.weight._grad_ivar().place.gpu_device_id(), 0)
360385

361386
self.linear.to(place=paddle.CPUPlace())
362387
self.assertTrue(self.linear.weight.place.is_cpu_place())
363388
self.assertTrue(self.linear.buf_name.place.is_cpu_place())
389+
self.assertTrue(self.linear.weight._grad_ivar().place.is_cpu_place())
364390

365391
self.linear.to(place='cpu')
366392
self.assertTrue(self.linear.weight.place.is_cpu_place())
367393
self.assertTrue(self.linear.buf_name.place.is_cpu_place())
394+
self.assertTrue(self.linear.weight._grad_ivar().place.is_cpu_place())
368395

369396
self.assertRaises(ValueError, self.linear.to, place=1)
370397

398+
self.assertRaises(AssertionError, self.linear.to, blocking=1)
399+
371400

372401
if __name__ == '__main__':
373402
unittest.main()

0 commit comments

Comments
 (0)