From d0915f8f379ea4d76dcbed2aa9cdc529151494d1 Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Thu, 11 Mar 2021 01:58:33 +0000 Subject: [PATCH 01/22] add custom init grad for backward function --- paddle/fluid/imperative/basic_engine.cc | 14 +++-- paddle/fluid/imperative/basic_engine.h | 3 +- paddle/fluid/pybind/imperative.cc | 4 +- .../fluid/dygraph/varbase_patch_methods.py | 24 ++++++++- .../tests/unittests/test_custom_grad_input.py | 53 +++++++++++++++++++ 5 files changed, 89 insertions(+), 9 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_custom_grad_input.py diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc index 29ba54986801f1..f2613df33cd2cf 100644 --- a/paddle/fluid/imperative/basic_engine.cc +++ b/paddle/fluid/imperative/basic_engine.cc @@ -36,7 +36,7 @@ DECLARE_bool(sort_sum_gradient); namespace paddle { namespace imperative { -void BasicEngine::Init(VarBase* var, bool retain_graph) { +void BasicEngine::Init(VarBase* var, bool retain_graph, VarBase* grad_tensor) { retain_graph_ = retain_graph; init_node_ = var->GradVarBase()->GradNode(); PADDLE_ENFORCE_EQ(var->GradVarBase()->GraphIsFreed(), false, @@ -75,9 +75,15 @@ void BasicEngine::Init(VarBase* var, bool retain_graph) { << " as stop_gradient false"; var->GradVarBase()->InnerSetOverridedStopGradient(false); auto* dev_ctx = platform::DeviceContextPool::Instance().Get(fwd_var.place()); - grad_var->Resize(fwd_var.dims()); - grad_var->mutable_data(fwd_var.place(), fwd_var.type()); - operators::math::set_constant(*dev_ctx, grad_var, 1.0); + if (grad_tensor == nullptr) { + grad_var->Resize(fwd_var.dims()); + grad_var->mutable_data(fwd_var.place(), fwd_var.type()); + operators::math::set_constant(*dev_ctx, grad_var, 1.0); + } else { + paddle::framework::TensorCopy( + grad_tensor->Var().Get(), fwd_var.place(), + *dev_ctx, grad_var); + } } void BasicEngine::CheckBackwardInputs(const OpBase& op) { diff --git a/paddle/fluid/imperative/basic_engine.h b/paddle/fluid/imperative/basic_engine.h index a2ad8b5f8aa61e..6a188b073b2d85 100644 --- a/paddle/fluid/imperative/basic_engine.h +++ b/paddle/fluid/imperative/basic_engine.h @@ -30,7 +30,8 @@ class OpBase; class BasicEngine : public Engine { public: - void Init(VarBase* var, bool retain_graph = false); + void Init(VarBase* var, bool retain_graph = false, + VarBase* grad_tensor = nullptr); void Execute() override; diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 58ef177863093d..48f1954a5bc52f 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -920,11 +920,11 @@ void BindImperative(py::module *m_ptr) { )DOC") .def("_run_backward", [](imperative::VarBase &self, const imperative::Tracer &tracer, - bool retain_graph) { + bool retain_graph, imperative::VarBase &grad_tensor) { // TODO(jiabin): when we impl more backward execution we can // select them auto *engine = tracer.GetEngine(); - engine->Init(&self, retain_graph); + engine->Init(&self, retain_graph, &grad_tensor); VLOG(3) << "Start backward"; engine->Execute(); VLOG(3) << "Finish backward"; diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index ac0944c5718908..a065a3c2e47846 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -133,7 +133,7 @@ def set_value(self, value): framework._current_expected_place()) @framework.dygraph_only - def backward(self, retain_graph=False): + def backward(self, retain_graph=False, grad_tensor=None): """ Run backward of current Graph which starts from current Tensor. @@ -147,6 +147,10 @@ def backward(self, retain_graph=False): :code:`retain_graph` to True, then the grads will be retained. Thus, seting it to False is much more memory-efficient. Defaults to False. + grad_tensor(Tensor, optional): initial gradient values of `outputs` . If `grad_tensor` is None, + the initial gradient values of `outputs` would be Tensor filled with 1; + if `grad_tensor` is not None, it must have the same length as `outputs`. + Default None. Returns: NoneType: None @@ -168,6 +172,17 @@ def backward(self, retain_graph=False): print("{}".format(x.grad)) # 0. + grad_tensor=paddle.to_tensor(2.) + for i in range(5): + y = paddle.pow(x, 4.0) + y.backward(grad_tensor=grad_tensor) + print("{}: {}".format(i, x.grad)) + # 0: [1000.] + # 1: [2000.] + # 2: [3000.] + # 3: [4000.] + # 4: [5000.] + """ if framework.in_dygraph_mode(): if paddle.is_compiled_with_xpu(): @@ -176,7 +191,12 @@ def backward(self, retain_graph=False): scaled_loss._run_backward(framework._dygraph_tracer(), retain_graph) else: - self._run_backward(framework._dygraph_tracer(), retain_graph) + if grad_tensor is not None: + assert grad_tensor.shape == self.shape, "Variable Shape not match, Variable of grad_tensor [ {} ] with shape {} mismatch Variable [ {} ] with shape {}".format( + grad_tensor.name, grad_tensor.shape, self.name, + self.shape) + self._run_backward(framework._dygraph_tracer(), retain_graph, + grad_tensor) else: raise ValueError( "Variable.backward() is only available in DyGraph mode") diff --git a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py new file mode 100644 index 00000000000000..c545565d86c944 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py @@ -0,0 +1,53 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np + +import paddle +import paddle.fluid.dygraph as dg +from op_test import OpTest + + +class TestBackward(unittest.TestCase): + def setUp(self): + self._dtypes = ["float32", "float64"] + self._places = [paddle.CPUPlace()] + if paddle.is_compiled_with_cuda(): + self._places.append(paddle.CUDAPlace(0)) + + def test_all_positive(self): + for dtype in self._dtypes: + x = np.random.random([2, 100]).astype(dtype) + y = np.random.random([100, 2]).astype(dtype) + z = np.matmul(x, y) + grad = np.random.random(z.shape).astype(dtype) + for place in self._places: + with dg.guard(place): + x_tensor = paddle.to_tensor(x, stop_gradient=False) + y_tensor = paddle.to_tensor(y) + z_tensor = paddle.matmul(x_tensor, y_tensor) + + grad_tensor = paddle.to_tensor(grad) + z_tensor.backward(grad_tensor=grad_tensor) + + x_grad = np.matmul(grad, y.T) + + self.assertTrue(np.allclose(x_grad, x_tensor.grad)) + + +if __name__ == '__main__': + unittest.main() From 0bccce663638df4131fc11e0d334916c0efb548b Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Thu, 11 Mar 2021 06:56:57 +0000 Subject: [PATCH 02/22] add custom init grad for backward function --- paddle/fluid/imperative/basic_engine.cc | 11 ++--------- python/paddle/fluid/dygraph/varbase_patch_methods.py | 9 +++++++-- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc index f2613df33cd2cf..bc03e178588791 100644 --- a/paddle/fluid/imperative/basic_engine.cc +++ b/paddle/fluid/imperative/basic_engine.cc @@ -75,15 +75,8 @@ void BasicEngine::Init(VarBase* var, bool retain_graph, VarBase* grad_tensor) { << " as stop_gradient false"; var->GradVarBase()->InnerSetOverridedStopGradient(false); auto* dev_ctx = platform::DeviceContextPool::Instance().Get(fwd_var.place()); - if (grad_tensor == nullptr) { - grad_var->Resize(fwd_var.dims()); - grad_var->mutable_data(fwd_var.place(), fwd_var.type()); - operators::math::set_constant(*dev_ctx, grad_var, 1.0); - } else { - paddle::framework::TensorCopy( - grad_tensor->Var().Get(), fwd_var.place(), - *dev_ctx, grad_var); - } + paddle::framework::TensorCopy(grad_tensor->Var().Get(), + fwd_var.place(), *dev_ctx, grad_var); } void BasicEngine::CheckBackwardInputs(const OpBase& op) { diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index a065a3c2e47846..bf89fea31b6b5c 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -191,8 +191,13 @@ def backward(self, retain_graph=False, grad_tensor=None): scaled_loss._run_backward(framework._dygraph_tracer(), retain_graph) else: - if grad_tensor is not None: - assert grad_tensor.shape == self.shape, "Variable Shape not match, Variable of grad_tensor [ {} ] with shape {} mismatch Variable [ {} ] with shape {}".format( + if grad_tensor is None: + grad_tensor = paddle.ones_like(self) + else: + assert isinstance( + grad_tensor, core.VarBase + ), "The type of grad_tensot must be paddle.VarBase" + assert grad_tensor.shape == self.shape, "Variable shape not match, Variable of grad_tensor [ {} ] with shape {} mismatch Variable [ {} ] with shape {}".format( grad_tensor.name, grad_tensor.shape, self.name, self.shape) self._run_backward(framework._dygraph_tracer(), retain_graph, From 5dac8e9568693d985d3d4bbee807a098db00fe1e Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Fri, 12 Mar 2021 07:29:09 +0000 Subject: [PATCH 03/22] handle when the grad_tensor is none --- paddle/fluid/imperative/basic_engine.cc | 11 +++++++++-- paddle/fluid/pybind/imperative.cc | 6 ++++-- python/paddle/fluid/dygraph/varbase_patch_methods.py | 4 +--- .../fluid/tests/unittests/test_imperative_basic.py | 1 + 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc index bc03e178588791..f2613df33cd2cf 100644 --- a/paddle/fluid/imperative/basic_engine.cc +++ b/paddle/fluid/imperative/basic_engine.cc @@ -75,8 +75,15 @@ void BasicEngine::Init(VarBase* var, bool retain_graph, VarBase* grad_tensor) { << " as stop_gradient false"; var->GradVarBase()->InnerSetOverridedStopGradient(false); auto* dev_ctx = platform::DeviceContextPool::Instance().Get(fwd_var.place()); - paddle::framework::TensorCopy(grad_tensor->Var().Get(), - fwd_var.place(), *dev_ctx, grad_var); + if (grad_tensor == nullptr) { + grad_var->Resize(fwd_var.dims()); + grad_var->mutable_data(fwd_var.place(), fwd_var.type()); + operators::math::set_constant(*dev_ctx, grad_var, 1.0); + } else { + paddle::framework::TensorCopy( + grad_tensor->Var().Get(), fwd_var.place(), + *dev_ctx, grad_var); + } } void BasicEngine::CheckBackwardInputs(const OpBase& op) { diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 48f1954a5bc52f..7cd8dcd4004f0b 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -920,15 +920,17 @@ void BindImperative(py::module *m_ptr) { )DOC") .def("_run_backward", [](imperative::VarBase &self, const imperative::Tracer &tracer, - bool retain_graph, imperative::VarBase &grad_tensor) { + bool retain_graph, imperative::VarBase *grad_tensor) { // TODO(jiabin): when we impl more backward execution we can // select them auto *engine = tracer.GetEngine(); - engine->Init(&self, retain_graph, &grad_tensor); + engine->Init(&self, retain_graph, grad_tensor); VLOG(3) << "Start backward"; engine->Execute(); VLOG(3) << "Finish backward"; }, + py::arg("tracer"), py::arg("retain_graph"), + py::arg("grad_tensor") = static_cast(nullptr), py::call_guard()) .def("_grad_name", &imperative::VarBase::GradVarName) .def("_grad_value", diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index bf89fea31b6b5c..814a4bccad2212 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -191,9 +191,7 @@ def backward(self, retain_graph=False, grad_tensor=None): scaled_loss._run_backward(framework._dygraph_tracer(), retain_graph) else: - if grad_tensor is None: - grad_tensor = paddle.ones_like(self) - else: + if grad_tensor is not None: assert isinstance( grad_tensor, core.VarBase ), "The type of grad_tensot must be paddle.VarBase" diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py index cb48013902a532..b1f231b1051c22 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py @@ -807,4 +807,5 @@ def test_without_guard(self): if __name__ == '__main__': paddle.enable_static() + #paddle.set_device("cpu") unittest.main() From ef4c7b90af5a2cd8ad0982e1a5bd7fa3a67b484b Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Fri, 12 Mar 2021 07:41:13 +0000 Subject: [PATCH 04/22] handle when the grad_tensor is none --- python/paddle/fluid/tests/unittests/test_imperative_basic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py index b1f231b1051c22..cb48013902a532 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py @@ -807,5 +807,4 @@ def test_without_guard(self): if __name__ == '__main__': paddle.enable_static() - #paddle.set_device("cpu") unittest.main() From 33b041687c401069b61561dc2e83ea670f13dec1 Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Mon, 15 Mar 2021 07:37:36 +0000 Subject: [PATCH 05/22] fix the args type error on windows platform --- paddle/fluid/pybind/imperative.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 7cd8dcd4004f0b..c9952c01bcec4b 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -920,17 +920,15 @@ void BindImperative(py::module *m_ptr) { )DOC") .def("_run_backward", [](imperative::VarBase &self, const imperative::Tracer &tracer, - bool retain_graph, imperative::VarBase *grad_tensor) { + bool retain_graph, std::shared_ptr &grad_tensor) { // TODO(jiabin): when we impl more backward execution we can // select them auto *engine = tracer.GetEngine(); - engine->Init(&self, retain_graph, grad_tensor); + engine->Init(&self, retain_graph, grad_tensor.get()); VLOG(3) << "Start backward"; engine->Execute(); VLOG(3) << "Finish backward"; }, - py::arg("tracer"), py::arg("retain_graph"), - py::arg("grad_tensor") = static_cast(nullptr), py::call_guard()) .def("_grad_name", &imperative::VarBase::GradVarName) .def("_grad_value", From 837e26be9a4c6d5f9f379b806df26d93fe0b4a41 Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Mon, 15 Mar 2021 08:08:55 +0000 Subject: [PATCH 06/22] modify the args order and doc --- python/paddle/fluid/dygraph/varbase_patch_methods.py | 12 ++++++------ .../fluid/tests/unittests/test_custom_grad_input.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index 814a4bccad2212..bbb001a05e2e4a 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -133,7 +133,7 @@ def set_value(self, value): framework._current_expected_place()) @framework.dygraph_only - def backward(self, retain_graph=False, grad_tensor=None): + def backward(self, grad_tensor=None, retain_graph=False): """ Run backward of current Graph which starts from current Tensor. @@ -142,15 +142,15 @@ def backward(self, retain_graph=False, grad_tensor=None): You can clear gradient by ``Tensor.clear_grad()`` . Args: + grad_tensor(Tensor, optional): initial gradient values of the current Tensor. If `grad_tensor` is None, + the initial gradient values of the current Tensor would be Tensor filled with 1.0; + if `grad_tensor` is not None, it must have the same length as the current Tensor. + Teh default value is None. + retain_graph(bool, optional): If False, the graph used to compute grads will be freed. If you would like to add more ops to the built graph after calling this method( :code:`backward` ), set the parameter :code:`retain_graph` to True, then the grads will be retained. Thus, seting it to False is much more memory-efficient. Defaults to False. - - grad_tensor(Tensor, optional): initial gradient values of `outputs` . If `grad_tensor` is None, - the initial gradient values of `outputs` would be Tensor filled with 1; - if `grad_tensor` is not None, it must have the same length as `outputs`. - Default None. Returns: NoneType: None diff --git a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py index c545565d86c944..73e19197326cc2 100644 --- a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py +++ b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py @@ -42,7 +42,7 @@ def test_all_positive(self): z_tensor = paddle.matmul(x_tensor, y_tensor) grad_tensor = paddle.to_tensor(grad) - z_tensor.backward(grad_tensor=grad_tensor) + z_tensor.backward(grad_tensor) x_grad = np.matmul(grad, y.T) From 19019708c44ec121738d7f4e616782c97c40337a Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Mon, 15 Mar 2021 08:44:38 +0000 Subject: [PATCH 07/22] format code --- paddle/fluid/pybind/imperative.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index c9952c01bcec4b..3f2bb5fbca9ba3 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -920,7 +920,8 @@ void BindImperative(py::module *m_ptr) { )DOC") .def("_run_backward", [](imperative::VarBase &self, const imperative::Tracer &tracer, - bool retain_graph, std::shared_ptr &grad_tensor) { + bool retain_graph, + std::shared_ptr &grad_tensor) { // TODO(jiabin): when we impl more backward execution we can // select them auto *engine = tracer.GetEngine(); From 55e0cfb7350f512e8d23d8b08b53827dabbf9c3f Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Mon, 15 Mar 2021 10:26:52 +0000 Subject: [PATCH 08/22] add grad_tensor to xpu --- .../fluid/dygraph/varbase_patch_methods.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index bbb001a05e2e4a..f9b1d1836683ab 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -157,6 +157,7 @@ def backward(self, grad_tensor=None, retain_graph=False): Examples: .. code-block:: python + import paddle x = paddle.to_tensor(5., stop_gradient=False) for i in range(5): y = paddle.pow(x, 4.0) @@ -175,7 +176,7 @@ def backward(self, grad_tensor=None, retain_graph=False): grad_tensor=paddle.to_tensor(2.) for i in range(5): y = paddle.pow(x, 4.0) - y.backward(grad_tensor=grad_tensor) + y.backward(grad_tensor) print("{}: {}".format(i, x.grad)) # 0: [1000.] # 1: [2000.] @@ -185,19 +186,19 @@ def backward(self, grad_tensor=None, retain_graph=False): """ if framework.in_dygraph_mode(): + if grad_tensor is not None: + assert isinstance( + grad_tensor, core. + VarBase), "The type of grad_tensot must be paddle.VarBase" + assert grad_tensor.shape == self.shape, "Variable shape not match, Variable of grad_tensor [ {} ] with shape {} mismatch Variable [ {} ] with shape {}".format( + grad_tensor.name, grad_tensor.shape, self.name, self.shape) + if paddle.is_compiled_with_xpu(): # TODO(liuyuhui): Currently only for xpu. Will be removed in the future. scaled_loss = scale_loss(self) scaled_loss._run_backward(framework._dygraph_tracer(), - retain_graph) + retain_graph, grad_tensor) else: - if grad_tensor is not None: - assert isinstance( - grad_tensor, core.VarBase - ), "The type of grad_tensot must be paddle.VarBase" - assert grad_tensor.shape == self.shape, "Variable shape not match, Variable of grad_tensor [ {} ] with shape {} mismatch Variable [ {} ] with shape {}".format( - grad_tensor.name, grad_tensor.shape, self.name, - self.shape) self._run_backward(framework._dygraph_tracer(), retain_graph, grad_tensor) else: From 8271dc0a4ddc33f800d7bbaf9752f11f3fd3db15 Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Tue, 16 Mar 2021 06:17:08 +0000 Subject: [PATCH 09/22] modify the grad_tensor type check --- python/paddle/fluid/dygraph/varbase_patch_methods.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index f9b1d1836683ab..07fabc9cb0a0bd 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -188,9 +188,10 @@ def backward(self, grad_tensor=None, retain_graph=False): if framework.in_dygraph_mode(): if grad_tensor is not None: assert isinstance( - grad_tensor, core. - VarBase), "The type of grad_tensot must be paddle.VarBase" - assert grad_tensor.shape == self.shape, "Variable shape not match, Variable of grad_tensor [ {} ] with shape {} mismatch Variable [ {} ] with shape {}".format( + grad_tensor, paddle. + Tensor), "The type of grad_tensot must be paddle.Tensor" + assert grad_tensor.shape == self.shape, \ + "Tensor shape not match, Tensor of grad_tensor [ {} ] with shape {} mismatch Tensor [ {} ] with shape {}".format( grad_tensor.name, grad_tensor.shape, self.name, self.shape) if paddle.is_compiled_with_xpu(): From 5af3bd00f8f4b2e22e13388eaf5b0b4f590c21f5 Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Thu, 18 Mar 2021 02:38:10 +0000 Subject: [PATCH 10/22] add paddle.backward api to support multi tensors gradient compute --- paddle/fluid/imperative/basic_engine.cc | 119 ++++++++++++++---------- paddle/fluid/imperative/basic_engine.h | 9 +- paddle/fluid/pybind/imperative.cc | 20 +++- python/paddle/fluid/dygraph/base.py | 59 +++++++++++- 4 files changed, 152 insertions(+), 55 deletions(-) diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc index f2613df33cd2cf..977419f5f84617 100644 --- a/paddle/fluid/imperative/basic_engine.cc +++ b/paddle/fluid/imperative/basic_engine.cc @@ -36,53 +36,72 @@ DECLARE_bool(sort_sum_gradient); namespace paddle { namespace imperative { -void BasicEngine::Init(VarBase* var, bool retain_graph, VarBase* grad_tensor) { +void BasicEngine::Init( + const std::vector>& tensors, + const std::vector>& grad_tensors, + bool retain_graph, bool create_graph, + const std::vector>& inputs) { retain_graph_ = retain_graph; - init_node_ = var->GradVarBase()->GradNode(); - PADDLE_ENFORCE_EQ(var->GradVarBase()->GraphIsFreed(), false, - platform::errors::Unavailable( - "%s trying to backward through the same graph a second " - "time, but this graph have already been freed. Please " - "specify Tensor.backward(retain_graph=True) when " - "calling backward at the first time.", - var->Name())); - - if (!retain_graph) { - VLOG(5) << "Clear the auto-grad graph from grad var " << var->Name() - << " because of retain_graph=False when calling backward"; - var->GradVarBase()->SetGraphIsFreed(true); - var->GradVarBase()->ClearGradNode(); - } - if (init_node_ == nullptr || var->OverridedStopGradient()) { - VLOG(3) << "Skip auto grad since there is no grad op for var or loss is " - "stop_gradient=True: " - << var->Name(); - return; - } + PADDLE_ENFORCE_EQ( + tensors.size(), grad_tensors.size(), + platform::errors::Unavailable( + "the size of tensors must equal the size of grad_tensors, but" + "the size of tensors is %s, and the size of grad_tensors is %s.", + tensors.size(), grad_tensors.size())); + + for (size_t i = 0; i < tensors.size(); ++i) { + auto var = tensors[i]; + auto grad_tensor = grad_tensors[i]; + + auto init_node_ = var->GradVarBase()->GradNode(); + PADDLE_ENFORCE_EQ( + var->GradVarBase()->GraphIsFreed(), false, + platform::errors::Unavailable( + "%s trying to backward through the same graph a second " + "time, but this graph have already been freed. Please " + "specify Tensor.backward(retain_graph=True) when " + "calling backward at the first time.", + var->Name())); + + if (!retain_graph) { + VLOG(5) << "Clear the auto-grad graph from grad var " << var->Name() + << " because of retain_graph=False when calling backward"; + var->GradVarBase()->SetGraphIsFreed(true); + var->GradVarBase()->ClearGradNode(); + } - VLOG(3) << "Init first node of backward"; + if (init_node_ == nullptr || var->OverridedStopGradient()) { + VLOG(3) << "Skip auto grad since there is no grad op for var or loss is " + "stop_gradient=True: " + << var->Name(); + continue; + } - PADDLE_ENFORCE_EQ( - var->HasGradVar(), true, - platform::errors::NotFound("Grad variable not exist for variable %s", - var->Name())); - - auto& fwd_var = var->Var().Get(); - auto* grad_var = - var->GradVarBase()->MutableVar()->GetMutable(); - VLOG(6) << "init loss grad:" << var->GradVarBase()->Name() - << " as stop_gradient false"; - var->GradVarBase()->InnerSetOverridedStopGradient(false); - auto* dev_ctx = platform::DeviceContextPool::Instance().Get(fwd_var.place()); - if (grad_tensor == nullptr) { - grad_var->Resize(fwd_var.dims()); - grad_var->mutable_data(fwd_var.place(), fwd_var.type()); - operators::math::set_constant(*dev_ctx, grad_var, 1.0); - } else { - paddle::framework::TensorCopy( - grad_tensor->Var().Get(), fwd_var.place(), - *dev_ctx, grad_var); + VLOG(3) << "Init node of backward"; + + PADDLE_ENFORCE_EQ( + var->HasGradVar(), true, + platform::errors::NotFound("Grad variable not exist for variable %s", + var->Name())); + + auto& fwd_var = var->Var().Get(); + auto* grad_var = + var->GradVarBase()->MutableVar()->GetMutable(); + VLOG(6) << "init loss grad:" << var->GradVarBase()->Name() + << " as stop_gradient false"; + var->GradVarBase()->InnerSetOverridedStopGradient(false); + auto* dev_ctx = + platform::DeviceContextPool::Instance().Get(fwd_var.place()); + if (grad_tensor == nullptr) { + grad_var->Resize(fwd_var.dims()); + grad_var->mutable_data(fwd_var.place(), fwd_var.type()); + operators::math::set_constant(*dev_ctx, grad_var, 1.0); + } else { + paddle::framework::TensorCopy( + grad_tensor->Var().Get(), fwd_var.place(), + *dev_ctx, grad_var); + } } } @@ -241,8 +260,10 @@ void BasicEngine::PrepareDeps() { std::queue q; std::unordered_set visited; - q.push(init_node_.get()); - visited.insert(init_node_.get()); + for (size_t i = 0; i < init_nodes_.size(); ++i) { + q.push(init_nodes_[i].get()); + visited.insert(init_nodes_[i].get()); + } while (!q.empty()) { auto* cur_node = q.front(); @@ -269,14 +290,16 @@ void BasicEngine::PrepareDeps() { } void BasicEngine::Execute() { - if (init_node_ == nullptr) { + if (init_nodes_.empty()) { return; } PrepareDeps(); // Start execute Computation graph std::queue> q; - q.push(std::move(init_node_)); + for (size_t i = 0; i < init_nodes_.size(); ++i) { + q.push(std::move(init_nodes_[i])); + } size_t op_num = 0; @@ -476,7 +499,7 @@ void BasicEngine::Execute() { } void BasicEngine::Clear() { - init_node_.reset(); + init_nodes_.clear(); node_deps_.clear(); accumulators_.clear(); accumulators_with_grad_node_.clear(); diff --git a/paddle/fluid/imperative/basic_engine.h b/paddle/fluid/imperative/basic_engine.h index 6a188b073b2d85..992066661c9605 100644 --- a/paddle/fluid/imperative/basic_engine.h +++ b/paddle/fluid/imperative/basic_engine.h @@ -30,8 +30,10 @@ class OpBase; class BasicEngine : public Engine { public: - void Init(VarBase* var, bool retain_graph = false, - VarBase* grad_tensor = nullptr); + void Init(const std::vector>& tensors, + const std::vector>& grad_tensors, + bool retain_graph, bool create_graph, + const std::vector>& inputs); void Execute() override; @@ -47,7 +49,7 @@ class BasicEngine : public Engine { void Clear(); private: - std::shared_ptr init_node_; + std::vector> init_nodes_; std::unordered_map node_deps_; // The input and output of Inplace op are the same. If only `var` is used // as the key, then the input and output of inplace op must be gradient @@ -75,6 +77,7 @@ class BasicEngine : public Engine { std::vector leaf_accumulators_; bool retain_graph_; + bool create_graph_; }; } // namespace imperative diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 3f2bb5fbca9ba3..402c2e83e8fadd 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -924,10 +924,10 @@ void BindImperative(py::module *m_ptr) { std::shared_ptr &grad_tensor) { // TODO(jiabin): when we impl more backward execution we can // select them - auto *engine = tracer.GetEngine(); - engine->Init(&self, retain_graph, grad_tensor.get()); + // auto *engine = tracer.GetEngine(); + // engine->Init(&self, retain_graph, grad_tensor.get()); VLOG(3) << "Start backward"; - engine->Execute(); + // engine->Execute(); VLOG(3) << "Finish backward"; }, py::call_guard()) @@ -1413,6 +1413,20 @@ void BindImperative(py::module *m_ptr) { }, py::call_guard()); + m.def( + "dygraph_run_backward", + [](const std::vector> &tensors, + const std::vector> &grad_tensors, + bool retain_graph, bool create_graph, + const std::vector> &inputs, + const imperative::Tracer &tracer) { + auto *engine = tracer.GetEngine(); + engine->Init(tensors, grad_tensors, retain_graph, create_graph, inputs); + VLOG(3) << "Start backward"; + engine->Execute(); + VLOG(3) << "Finish backward"; + }); + #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \ defined(PADDLE_WITH_XPU_BKCL) py::class_ 0, "{} connot be empyt".format(name) + for each_var in in_out_list: + assert isinstance( + each_var, paddle. + Tensor), "Elements of {} must be paddle.Tensor".format(name) + return in_out_list + else: + assert isinstance( + in_out_list, + paddle.Tensor), "{} must be Tensor or list of Tensor".format( + name) + return [in_out_list] + + tensors = check_tensors(tensors, "tensors") + + if grad_tensors is not None: + if not isinstance(grad_tensors, (list, tuple)): + grad_tensors = [grad_tensors] + + for each_tensor in grad_tensors: + if each_tensor is not None: + assert isinstance( + each_tensor, paddle.Tensor + ), "grad_tensors must be None, Tensor or list containing None or Tensor" + else: + grad_tensors = [] + + if len(grad_tensors) > 0: + assert len(tensors) == len( + grad_tensors), "The length of grad_tensors must be equal to tensors" + + assert isinstance(create_graph, bool), "create_graph must be True or False" + + if retain_graph is None: + retain_graph = create_graph + + assert isinstance(retain_graph, + bool), "retain_graph must be None, True or False" + + if inputs is not None: + assert len(inputs) > 0, "inputs cannot be empty list" + + core.dygraph_run_backward(tensors, grad_tensors, retain_graph, create_graph, + inputs) + + @framework.dygraph_only def to_variable(value, name=None, zero_copy=None, dtype=None): r""" From 1467feb19a8f746e29e302241d2f7ba2cc6b590c Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Thu, 18 Mar 2021 08:37:32 +0000 Subject: [PATCH 11/22] add paddle.backward api to support multi tensors gradient compute --- paddle/fluid/imperative/basic_engine.cc | 2 ++ python/paddle/fluid/dygraph/base.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc index 977419f5f84617..a5f8bf26578e4a 100644 --- a/paddle/fluid/imperative/basic_engine.cc +++ b/paddle/fluid/imperative/basic_engine.cc @@ -102,6 +102,8 @@ void BasicEngine::Init( grad_tensor->Var().Get(), fwd_var.place(), *dev_ctx, grad_var); } + + init_nodes_.push_back(init_node_); } } diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py index 8f7ed202670ea0..8c580c03c5f264 100644 --- a/python/paddle/fluid/dygraph/base.py +++ b/python/paddle/fluid/dygraph/base.py @@ -646,7 +646,7 @@ def check_tensors(in_out_list, name): assert len(inputs) > 0, "inputs cannot be empty list" core.dygraph_run_backward(tensors, grad_tensors, retain_graph, create_graph, - inputs) + inputs, framework._dygraph_tracer()) @framework.dygraph_only From eb267fa3ec5051ab0fd5ac7b5781648990ec0789 Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Fri, 19 Mar 2021 02:16:06 +0000 Subject: [PATCH 12/22] add paddle.atuograd module and backward api --- python/paddle/__init__.py | 1 + python/paddle/autograd/__init__.py | 22 ++++++++ python/paddle/autograd/backward_mode.py | 74 +++++++++++++++++++++++++ python/paddle/fluid/dygraph/base.py | 58 +------------------ 4 files changed, 98 insertions(+), 57 deletions(-) create mode 100644 python/paddle/autograd/__init__.py create mode 100644 python/paddle/autograd/backward_mode.py diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 8dabe19f57c58f..02725751cb6694 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -44,6 +44,7 @@ import paddle.device import paddle.regularizer import paddle.incubate +import paddle.autograd # TODO: define alias in tensor and framework directory diff --git a/python/paddle/autograd/__init__.py b/python/paddle/autograd/__init__.py new file mode 100644 index 00000000000000..8b3f3086a4a728 --- /dev/null +++ b/python/paddle/autograd/__init__.py @@ -0,0 +1,22 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ..fluid.dygraph.base import grad #DEFINE_ALIAS + +from . import backward_mode +from .backward_mode import backward + +__all__ = ['grad'] + +__all__ += backward_mode.__all__ diff --git a/python/paddle/autograd/backward_mode.py b/python/paddle/autograd/backward_mode.py new file mode 100644 index 00000000000000..a045c7b2840ffc --- /dev/null +++ b/python/paddle/autograd/backward_mode.py @@ -0,0 +1,74 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.fluid import core +from paddle.fluid import framework +import paddle +__all__ = ['backward'] + + +@framework.dygraph_only +def backward(tensors, + grad_tensors, + retain_graph=None, + create_graph=False, + inputs=None): + def check_tensors(in_out_list, name): + assert in_out_list is not None, "{} should not be None".format(name) + + if isinstance(in_out_list, (list, tuple)): + assert len(in_out_list) > 0, "{} connot be empyt".format(name) + for each_var in in_out_list: + assert isinstance( + each_var, paddle. + Tensor), "Elements of {} must be paddle.Tensor".format(name) + return in_out_list + else: + assert isinstance( + in_out_list, + paddle.Tensor), "{} must be Tensor or list of Tensor".format( + name) + return [in_out_list] + + tensors = check_tensors(tensors, "tensors") + + if grad_tensors is not None: + if not isinstance(grad_tensors, (list, tuple)): + grad_tensors = [grad_tensors] + + for each_tensor in grad_tensors: + if each_tensor is not None: + assert isinstance( + each_tensor, paddle.Tensor + ), "grad_tensors must be None, Tensor or list containing None or Tensor" + else: + grad_tensors = [] + + if len(grad_tensors) > 0: + assert len(tensors) == len( + grad_tensors), "The length of grad_tensors must be equal to tensors" + + assert isinstance(create_graph, bool), "create_graph must be True or False" + + if retain_graph is None: + retain_graph = create_graph + + assert isinstance(retain_graph, + bool), "retain_graph must be None, True or False" + + if inputs is not None: + assert len(inputs) > 0, "inputs cannot be empty list" + + core.dygraph_run_backward(tensors, grad_tensors, retain_graph, create_graph, + inputs, framework._dygraph_tracer()) diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py index 8c580c03c5f264..be5d9ac58311b5 100644 --- a/python/paddle/fluid/dygraph/base.py +++ b/python/paddle/fluid/dygraph/base.py @@ -30,7 +30,7 @@ __all__ = [ 'no_grad', 'no_grad_', 'grad', 'guard', 'enable_dygraph', 'disable_dygraph', - 'enabled', 'to_variable', 'backward' + 'enabled', 'to_variable' ] @@ -593,62 +593,6 @@ def check_in_out(in_out_list, name): retain_graph, allow_unused, only_inputs) -@framework.dygraph_only -def backward(tensors, - grad_tensors, - retain_graph=None, - create_graph=False, - inputs=None): - def check_tensors(in_out_list, name): - assert in_out_list is not None, "{} should not be None".format(name) - - if isinstance(in_out_list, (list, tuple)): - assert len(in_out_list) > 0, "{} connot be empyt".format(name) - for each_var in in_out_list: - assert isinstance( - each_var, paddle. - Tensor), "Elements of {} must be paddle.Tensor".format(name) - return in_out_list - else: - assert isinstance( - in_out_list, - paddle.Tensor), "{} must be Tensor or list of Tensor".format( - name) - return [in_out_list] - - tensors = check_tensors(tensors, "tensors") - - if grad_tensors is not None: - if not isinstance(grad_tensors, (list, tuple)): - grad_tensors = [grad_tensors] - - for each_tensor in grad_tensors: - if each_tensor is not None: - assert isinstance( - each_tensor, paddle.Tensor - ), "grad_tensors must be None, Tensor or list containing None or Tensor" - else: - grad_tensors = [] - - if len(grad_tensors) > 0: - assert len(tensors) == len( - grad_tensors), "The length of grad_tensors must be equal to tensors" - - assert isinstance(create_graph, bool), "create_graph must be True or False" - - if retain_graph is None: - retain_graph = create_graph - - assert isinstance(retain_graph, - bool), "retain_graph must be None, True or False" - - if inputs is not None: - assert len(inputs) > 0, "inputs cannot be empty list" - - core.dygraph_run_backward(tensors, grad_tensors, retain_graph, create_graph, - inputs, framework._dygraph_tracer()) - - @framework.dygraph_only def to_variable(value, name=None, zero_copy=None, dtype=None): r""" From 2bb8f3cea4eb9d2fbaa04221441274bbe95f6116 Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Tue, 23 Mar 2021 07:52:33 +0000 Subject: [PATCH 13/22] change tensor.backward func args --- paddle/fluid/pybind/imperative.cc | 15 ++++++---- python/paddle/autograd/backward_mode.py | 2 ++ .../tests/unittests/test_custom_grad_input.py | 30 +++++++++++++++++-- 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 402c2e83e8fadd..db000a94c2567e 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -919,15 +919,17 @@ void BindImperative(py::module *m_ptr) { print(x.grad) # None )DOC") .def("_run_backward", - [](imperative::VarBase &self, const imperative::Tracer &tracer, + [](std::vector> &self, const imperative::Tracer &tracer, bool retain_graph, - std::shared_ptr &grad_tensor) { + std::vector> &grad_tensor) { // TODO(jiabin): when we impl more backward execution we can // select them - // auto *engine = tracer.GetEngine(); - // engine->Init(&self, retain_graph, grad_tensor.get()); + std::vector> inputs; + + auto *engine = tracer.GetEngine(); + engine->Init(self, grad_tensor, retain_graph, false, inputs); VLOG(3) << "Start backward"; - // engine->Execute(); + engine->Execute(); VLOG(3) << "Finish backward"; }, py::call_guard()) @@ -1425,7 +1427,8 @@ void BindImperative(py::module *m_ptr) { VLOG(3) << "Start backward"; engine->Execute(); VLOG(3) << "Finish backward"; - }); + }, + py::call_guard()); #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \ defined(PADDLE_WITH_XPU_BKCL) diff --git a/python/paddle/autograd/backward_mode.py b/python/paddle/autograd/backward_mode.py index a045c7b2840ffc..556763defc0d90 100644 --- a/python/paddle/autograd/backward_mode.py +++ b/python/paddle/autograd/backward_mode.py @@ -69,6 +69,8 @@ def check_tensors(in_out_list, name): if inputs is not None: assert len(inputs) > 0, "inputs cannot be empty list" + else: + inputs = [] core.dygraph_run_backward(tensors, grad_tensors, retain_graph, create_graph, inputs, framework._dygraph_tracer()) diff --git a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py index 73e19197326cc2..d8efca1f25a325 100644 --- a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py +++ b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py @@ -22,14 +22,14 @@ from op_test import OpTest -class TestBackward(unittest.TestCase): +class TestTensorBackward(unittest.TestCase): def setUp(self): self._dtypes = ["float32", "float64"] self._places = [paddle.CPUPlace()] if paddle.is_compiled_with_cuda(): self._places.append(paddle.CUDAPlace(0)) - def test_all_positive(self): + def test_tensor_backward(self): for dtype in self._dtypes: x = np.random.random([2, 100]).astype(dtype) y = np.random.random([100, 2]).astype(dtype) @@ -48,6 +48,32 @@ def test_all_positive(self): self.assertTrue(np.allclose(x_grad, x_tensor.grad)) +class TestBackwardAPI(unittest.TestCase): + def setUp(self): + self._dtypes = ["float32", "float64"] + self._places = [paddle.CPUPlace()] + if paddle.is_compiled_with_cuda(): + self._places.append(paddle.CUDAPlace(0)) + + def test_backward_api(self): + for dtype in self._dtypes: + x = np.random.random([2, 2]).astype(dtype) + y = np.random.random([2, 2]).astype(dtype) + z = np.matmul(x, y) + grad = np.random.random(z.shape).astype(dtype) + for place in self._places: + with dg.guard(place): + x_tensor = paddle.to_tensor(x, stop_gradient=False) + y_tensor = paddle.to_tensor(y) + z_tensor = paddle.matmul(x_tensor, y_tensor) + + grad_tensor = paddle.to_tensor(grad) + paddle.autograd.backward([z_tensor, z_tensor], [grad_tensor, grad_tensor], True) + + x_grad = np.matmul(grad, y.T) + + self.assertTrue(np.allclose(x_grad*2, x_tensor.grad)) + if __name__ == '__main__': unittest.main() From 41b375fb81f6ccd1164602256e018d483ef7fc7c Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Tue, 23 Mar 2021 09:28:05 +0000 Subject: [PATCH 14/22] modify tensor backward api --- paddle/fluid/pybind/imperative.cc | 14 +++++++++----- .../tests/unittests/test_custom_grad_input.py | 6 ++++-- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index db000a94c2567e..52008abce2193e 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -720,6 +720,7 @@ void BindImperative(py::module *m_ptr) { Bump the version whenever the Tensor is modified through an inplace operation. )DOC") .def("numpy", + [](imperative::VarBase &self) -> py::array { const auto &tensor = self.MutableVar()->Get(); @@ -919,15 +920,18 @@ void BindImperative(py::module *m_ptr) { print(x.grad) # None )DOC") .def("_run_backward", - [](std::vector> &self, const imperative::Tracer &tracer, - bool retain_graph, - std::vector> &grad_tensor) { + [](std::shared_ptr &self, + const imperative::Tracer &tracer, bool retain_graph, + std::shared_ptr &grad_tensor) { // TODO(jiabin): when we impl more backward execution we can // select them + std::vector> tensors{self}; + std::vector> grad_tensors{ + grad_tensor}; std::vector> inputs; - + auto *engine = tracer.GetEngine(); - engine->Init(self, grad_tensor, retain_graph, false, inputs); + engine->Init(tensors, grad_tensors, retain_graph, false, inputs); VLOG(3) << "Start backward"; engine->Execute(); VLOG(3) << "Finish backward"; diff --git a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py index d8efca1f25a325..b6b496b8be8ed6 100644 --- a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py +++ b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py @@ -48,6 +48,7 @@ def test_tensor_backward(self): self.assertTrue(np.allclose(x_grad, x_tensor.grad)) + class TestBackwardAPI(unittest.TestCase): def setUp(self): self._dtypes = ["float32", "float64"] @@ -68,11 +69,12 @@ def test_backward_api(self): z_tensor = paddle.matmul(x_tensor, y_tensor) grad_tensor = paddle.to_tensor(grad) - paddle.autograd.backward([z_tensor, z_tensor], [grad_tensor, grad_tensor], True) + paddle.autograd.backward([z_tensor, z_tensor], + [grad_tensor, grad_tensor], True) x_grad = np.matmul(grad, y.T) - self.assertTrue(np.allclose(x_grad*2, x_tensor.grad)) + self.assertTrue(np.allclose(x_grad * 2, x_tensor.grad)) if __name__ == '__main__': From 6974e5c838c6536fcee5e5a4c4731045d1ff1bf2 Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Tue, 23 Mar 2021 09:54:42 +0000 Subject: [PATCH 15/22] remove create_graph intputs args --- paddle/fluid/imperative/basic_engine.cc | 3 +-- paddle/fluid/imperative/basic_engine.h | 3 +-- paddle/fluid/pybind/imperative.cc | 9 +++------ python/paddle/autograd/backward_mode.py | 23 ++++------------------- 4 files changed, 9 insertions(+), 29 deletions(-) diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc index a5f8bf26578e4a..5902376560f09b 100644 --- a/paddle/fluid/imperative/basic_engine.cc +++ b/paddle/fluid/imperative/basic_engine.cc @@ -39,8 +39,7 @@ namespace imperative { void BasicEngine::Init( const std::vector>& tensors, const std::vector>& grad_tensors, - bool retain_graph, bool create_graph, - const std::vector>& inputs) { + bool retain_graph) { retain_graph_ = retain_graph; PADDLE_ENFORCE_EQ( diff --git a/paddle/fluid/imperative/basic_engine.h b/paddle/fluid/imperative/basic_engine.h index 992066661c9605..d4c0ae84191f97 100644 --- a/paddle/fluid/imperative/basic_engine.h +++ b/paddle/fluid/imperative/basic_engine.h @@ -32,8 +32,7 @@ class BasicEngine : public Engine { public: void Init(const std::vector>& tensors, const std::vector>& grad_tensors, - bool retain_graph, bool create_graph, - const std::vector>& inputs); + bool retain_graph); void Execute() override; diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 52008abce2193e..c16999d1fc52dd 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -928,10 +928,9 @@ void BindImperative(py::module *m_ptr) { std::vector> tensors{self}; std::vector> grad_tensors{ grad_tensor}; - std::vector> inputs; auto *engine = tracer.GetEngine(); - engine->Init(tensors, grad_tensors, retain_graph, false, inputs); + engine->Init(tensors, grad_tensors, retain_graph); VLOG(3) << "Start backward"; engine->Execute(); VLOG(3) << "Finish backward"; @@ -1423,11 +1422,9 @@ void BindImperative(py::module *m_ptr) { "dygraph_run_backward", [](const std::vector> &tensors, const std::vector> &grad_tensors, - bool retain_graph, bool create_graph, - const std::vector> &inputs, - const imperative::Tracer &tracer) { + bool retain_graph, const imperative::Tracer &tracer) { auto *engine = tracer.GetEngine(); - engine->Init(tensors, grad_tensors, retain_graph, create_graph, inputs); + engine->Init(tensors, grad_tensors, retain_graph); VLOG(3) << "Start backward"; engine->Execute(); VLOG(3) << "Finish backward"; diff --git a/python/paddle/autograd/backward_mode.py b/python/paddle/autograd/backward_mode.py index 556763defc0d90..f274cab3d75279 100644 --- a/python/paddle/autograd/backward_mode.py +++ b/python/paddle/autograd/backward_mode.py @@ -19,11 +19,7 @@ @framework.dygraph_only -def backward(tensors, - grad_tensors, - retain_graph=None, - create_graph=False, - inputs=None): +def backward(tensors, grad_tensors, retain_graph=False): def check_tensors(in_out_list, name): assert in_out_list is not None, "{} should not be None".format(name) @@ -59,18 +55,7 @@ def check_tensors(in_out_list, name): assert len(tensors) == len( grad_tensors), "The length of grad_tensors must be equal to tensors" - assert isinstance(create_graph, bool), "create_graph must be True or False" + assert isinstance(retain_graph, bool), "retain_graph must be True or False" - if retain_graph is None: - retain_graph = create_graph - - assert isinstance(retain_graph, - bool), "retain_graph must be None, True or False" - - if inputs is not None: - assert len(inputs) > 0, "inputs cannot be empty list" - else: - inputs = [] - - core.dygraph_run_backward(tensors, grad_tensors, retain_graph, create_graph, - inputs, framework._dygraph_tracer()) + core.dygraph_run_backward(tensors, grad_tensors, retain_graph, + framework._dygraph_tracer()) From 1e3e9759a7d61c2af0ae972ea311a3c71ea1185a Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Wed, 24 Mar 2021 03:27:35 +0000 Subject: [PATCH 16/22] add doc and examplex code for backward api --- python/paddle/autograd/backward_mode.py | 45 +++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/python/paddle/autograd/backward_mode.py b/python/paddle/autograd/backward_mode.py index f274cab3d75279..2d44a65f7afae7 100644 --- a/python/paddle/autograd/backward_mode.py +++ b/python/paddle/autograd/backward_mode.py @@ -19,7 +19,48 @@ @framework.dygraph_only -def backward(tensors, grad_tensors, retain_graph=False): +def backward(tensors, grad_tensors=None, retain_graph=False): + """ + Compute the backward gradients of given tensors. + + Args: + tensors(list of Tensors): the tensors which the gradient to be computed. + + grad_tensors(list of Tensors of None, optional): the init gradients of the `tensors`` .If not None, it must have the same length with ``tensors`` , + and if any of the elements is None, then the init gradient is the default value which is filled with 1.0. + If None, all the gradients of the ``tensors`` is the default value which is filled with 1.0. + + retain_graph(bool, optional): If False, the graph used to compute grads will be freed. If you would + like to add more ops to the built graph after calling this method( :code:`backward` ), set the parameter + :code:`retain_graph` to True, then the grads will be retained. Thus, seting it to False is much more memory-efficient. + Defaults to False. + + Returns: + NoneType: Non + + + Examples: + .. code-block:: python + + import paddle + x = paddle.to_tensor([[1, 2], [3, 4]], dtype='float32', stop_gradient=False) + y = paddle.to_tensor([[3, 2], [3, 4]], dtype='float32') + + grad_tensor = paddle.to_tensor([[1,2], [1, 1]], dtype='float32') + + z = paddle.matmul(x, y) + #Tensor(shape=[2, 2], dtype=float32, place=CUDAPlace(0), stop_gradient=False, + # [[9. , 10.], + # [21., 22.]]) + + paddle.autograd.backward([z, z], [grad_tensor, grad_tensor], True) + + print(x.grad) + #[[14. 22.] + # [10. 14.]]] + + """ + def check_tensors(in_out_list, name): assert in_out_list is not None, "{} should not be None".format(name) @@ -49,7 +90,7 @@ def check_tensors(in_out_list, name): each_tensor, paddle.Tensor ), "grad_tensors must be None, Tensor or list containing None or Tensor" else: - grad_tensors = [] + grad_tensors = [None] * len(tensors) if len(grad_tensors) > 0: assert len(tensors) == len( From c7de011fda6b0808c2abea47ed3eff8cd1d386c5 Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Wed, 24 Mar 2021 06:34:35 +0000 Subject: [PATCH 17/22] when have the same tensor, throw error --- python/paddle/autograd/backward_mode.py | 41 +++++++++++++------ .../tests/unittests/test_custom_grad_input.py | 5 ++- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/python/paddle/autograd/backward_mode.py b/python/paddle/autograd/backward_mode.py index 2d44a65f7afae7..a3e211893c510d 100644 --- a/python/paddle/autograd/backward_mode.py +++ b/python/paddle/autograd/backward_mode.py @@ -24,7 +24,7 @@ def backward(tensors, grad_tensors=None, retain_graph=False): Compute the backward gradients of given tensors. Args: - tensors(list of Tensors): the tensors which the gradient to be computed. + tensors(list of Tensors): the tensors which the gradient to be computed. The tensors can not contain the same tensor. grad_tensors(list of Tensors of None, optional): the init gradients of the `tensors`` .If not None, it must have the same length with ``tensors`` , and if any of the elements is None, then the init gradient is the default value which is filled with 1.0. @@ -42,22 +42,34 @@ def backward(tensors, grad_tensors=None, retain_graph=False): Examples: .. code-block:: python - import paddle - x = paddle.to_tensor([[1, 2], [3, 4]], dtype='float32', stop_gradient=False) - y = paddle.to_tensor([[3, 2], [3, 4]], dtype='float32') + import paddle + x = paddle.to_tensor([[1, 2], [3, 4]], dtype='float32', stop_gradient=False) + y = paddle.to_tensor([[3, 2], [3, 4]], dtype='float32') - grad_tensor = paddle.to_tensor([[1,2], [1, 1]], dtype='float32') + grad_tensor1 = paddle.to_tensor([[1,2], [2, 3]], dtype='float32') + grad_tensor2 = paddle.to_tensor([[1,1], [1, 1]], dtype='float32') - z = paddle.matmul(x, y) - #Tensor(shape=[2, 2], dtype=float32, place=CUDAPlace(0), stop_gradient=False, - # [[9. , 10.], - # [21., 22.]]) + z1 = paddle.matmul(x, y) + z2 = paddle.matmul(x, y) - paddle.autograd.backward([z, z], [grad_tensor, grad_tensor], True) + paddle.autograd.backward([z1, z2], [grad_tensor1, grad_tensor2], True) + print(x.grad) + #[[12. 18.] + # [17. 25.]] - print(x.grad) - #[[14. 22.] - # [10. 14.]]] + x.clear_grad() + + paddle.autograd.backward([z1, z2], [grad_tensor1, None], True) + print(x.grad) + #[[12. 18.] + # [17. 25.]] + + x.clear_grad() + + paddle.autograd.backward([z1, z2]) + print(x.grad) + #[[10. 14.] + # [10. 14.]] """ @@ -80,6 +92,9 @@ def check_tensors(in_out_list, name): tensors = check_tensors(tensors, "tensors") + assert len(tensors) == len(set( + tensors)), "the arg tensors should not contains same element" + if grad_tensors is not None: if not isinstance(grad_tensors, (list, tuple)): grad_tensors = [grad_tensors] diff --git a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py index b6b496b8be8ed6..ddd92138ec2b8d 100644 --- a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py +++ b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py @@ -66,10 +66,11 @@ def test_backward_api(self): with dg.guard(place): x_tensor = paddle.to_tensor(x, stop_gradient=False) y_tensor = paddle.to_tensor(y) - z_tensor = paddle.matmul(x_tensor, y_tensor) + z_tensor1 = paddle.matmul(x_tensor, y_tensor) + z_tensor2 = paddle.matmul(x_tensor, y_tensor) grad_tensor = paddle.to_tensor(grad) - paddle.autograd.backward([z_tensor, z_tensor], + paddle.autograd.backward([z_tensor1, z_tensor2], [grad_tensor, grad_tensor], True) x_grad = np.matmul(grad, y.T) From 2f2824ce955d0ff1be9696746874ff18fb4ff670 Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Wed, 24 Mar 2021 07:40:43 +0000 Subject: [PATCH 18/22] modify test Init func args --- paddle/fluid/imperative/basic_engine.h | 2 +- paddle/fluid/imperative/tests/test_hooks.cc | 8 ++++++-- paddle/fluid/imperative/tests/test_tracer.cc | 9 +++++++-- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/imperative/basic_engine.h b/paddle/fluid/imperative/basic_engine.h index d4c0ae84191f97..ee245270270e3a 100644 --- a/paddle/fluid/imperative/basic_engine.h +++ b/paddle/fluid/imperative/basic_engine.h @@ -32,7 +32,7 @@ class BasicEngine : public Engine { public: void Init(const std::vector>& tensors, const std::vector>& grad_tensors, - bool retain_graph); + bool retain_graph = false); void Execute() override; diff --git a/paddle/fluid/imperative/tests/test_hooks.cc b/paddle/fluid/imperative/tests/test_hooks.cc index 7bf5f876681bab..56d2af75f8602b 100644 --- a/paddle/fluid/imperative/tests/test_hooks.cc +++ b/paddle/fluid/imperative/tests/test_hooks.cc @@ -93,8 +93,10 @@ TEST(TestHooks, TestGradVarLeafBackwardHook) { ASSERT_EQ(out->GradVarBase()->GradOpNum(), 1UL); // 3. backward + std::vector> tensors{out}; + std::vector> grad_tensors { nullptr } BasicEngine engine; - engine.Init(out.get()); + engine.Init(tensors, grad_tensors); engine.Execute(); framework::LoDTensor x_grad; @@ -193,8 +195,10 @@ void GradVarLeafBackwardHookWithGradAccmulatedTest() { ASSERT_EQ(out->GradVarBase()->GradOpNum(), 1UL); // 3. backward + std::vector> tensors{out}; + std::vector> grad_tensors { nullptr } BasicEngine engine; - engine.Init(out.get()); + engine.Init(tensors, grad_tensors); engine.Execute(); framework::LoDTensor x_grad; diff --git a/paddle/fluid/imperative/tests/test_tracer.cc b/paddle/fluid/imperative/tests/test_tracer.cc index 9e3b0ea5df6838..76de413b3e6033 100644 --- a/paddle/fluid/imperative/tests/test_tracer.cc +++ b/paddle/fluid/imperative/tests/test_tracer.cc @@ -250,7 +250,10 @@ TEST(test_tracer, test_trace_op_with_multi_device_inputs) { tracer.TraceOp("reduce_sum", reduce_in, reduce_out, reduce_attr_map, gpu_place, true); imperative::BasicEngine engine; - engine.Init(reduce_sum_out.get()); + + std::vector> tensors{reduce_sum_out}; + std::vector> grad_tensors{nullptr}; + engine.Init(tensors, grad_tensors); engine.Execute(); framework::LoDTensor rlt; @@ -376,8 +379,10 @@ TEST(test_tracer, test_var_without_grad_var) { ASSERT_EQ(y_in->GradVarBase()->GradOpNum(), 0UL); ASSERT_EQ(vout->GradVarBase()->GradOpNum(), 1UL); + std::vector> tensors{vout}; + std::vector> grad_tensors{nullptr}; imperative::BasicEngine engine; - engine.Init(vout.get()); + engine.Init(tensors, grad_tensors); engine.Execute(); // check the grad From 8415df41fd6bca9b951060ab5cdc6678a5055b4c Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Wed, 24 Mar 2021 08:18:51 +0000 Subject: [PATCH 19/22] modify the execute.Init func args in test files --- paddle/fluid/imperative/tests/test_hooks.cc | 4 ++-- python/paddle/autograd/backward_mode.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/imperative/tests/test_hooks.cc b/paddle/fluid/imperative/tests/test_hooks.cc index 56d2af75f8602b..0e538bd44d9867 100644 --- a/paddle/fluid/imperative/tests/test_hooks.cc +++ b/paddle/fluid/imperative/tests/test_hooks.cc @@ -94,7 +94,7 @@ TEST(TestHooks, TestGradVarLeafBackwardHook) { // 3. backward std::vector> tensors{out}; - std::vector> grad_tensors { nullptr } + std::vector> grad_tensors{nullptr}; BasicEngine engine; engine.Init(tensors, grad_tensors); engine.Execute(); @@ -196,7 +196,7 @@ void GradVarLeafBackwardHookWithGradAccmulatedTest() { // 3. backward std::vector> tensors{out}; - std::vector> grad_tensors { nullptr } + std::vector> grad_tensors{nullptr}; BasicEngine engine; engine.Init(tensors, grad_tensors); engine.Execute(); diff --git a/python/paddle/autograd/backward_mode.py b/python/paddle/autograd/backward_mode.py index a3e211893c510d..ac19ef3ff102d6 100644 --- a/python/paddle/autograd/backward_mode.py +++ b/python/paddle/autograd/backward_mode.py @@ -29,6 +29,7 @@ def backward(tensors, grad_tensors=None, retain_graph=False): grad_tensors(list of Tensors of None, optional): the init gradients of the `tensors`` .If not None, it must have the same length with ``tensors`` , and if any of the elements is None, then the init gradient is the default value which is filled with 1.0. If None, all the gradients of the ``tensors`` is the default value which is filled with 1.0. + Defaults to None. retain_graph(bool, optional): If False, the graph used to compute grads will be freed. If you would like to add more ops to the built graph after calling this method( :code:`backward` ), set the parameter @@ -36,7 +37,7 @@ def backward(tensors, grad_tensors=None, retain_graph=False): Defaults to False. Returns: - NoneType: Non + NoneType: None Examples: From be065e4b2fcad351f54aa67c21588c3c2a618c9c Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Wed, 24 Mar 2021 08:57:18 +0000 Subject: [PATCH 20/22] add paddle.autograd package in setup.py.in --- python/setup.py.in | 1 + 1 file changed, 1 insertion(+) diff --git a/python/setup.py.in b/python/setup.py.in index 64cfe6e9ccff74..0114b4c223a5f8 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -216,6 +216,7 @@ packages=['paddle', 'paddle.static.amp', 'paddle.tensor', 'paddle.onnx', + 'paddle.autograd', ] with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f: From 7f8e58c46798c0f05fe8814be0b127763e221fd9 Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Mon, 29 Mar 2021 03:57:59 +0000 Subject: [PATCH 21/22] modify error msg, remove _run_backward method in class Tensor --- paddle/fluid/imperative/basic_engine.cc | 13 ++++++------- paddle/fluid/imperative/basic_engine.h | 1 - paddle/fluid/pybind/imperative.cc | 17 ----------------- python/paddle/autograd/backward_mode.py | 7 ++++--- .../fluid/dygraph/varbase_patch_methods.py | 9 +++++---- 5 files changed, 15 insertions(+), 32 deletions(-) diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc index 5902376560f09b..6601916d9d583e 100644 --- a/paddle/fluid/imperative/basic_engine.cc +++ b/paddle/fluid/imperative/basic_engine.cc @@ -45,15 +45,15 @@ void BasicEngine::Init( PADDLE_ENFORCE_EQ( tensors.size(), grad_tensors.size(), platform::errors::Unavailable( - "the size of tensors must equal the size of grad_tensors, but" - "the size of tensors is %s, and the size of grad_tensors is %s.", + "The size of tensors do not equal the size of grad_tensors," + "the size of tensors is %s, but the size of grad_tensors is %s.", tensors.size(), grad_tensors.size())); for (size_t i = 0; i < tensors.size(); ++i) { auto var = tensors[i]; auto grad_tensor = grad_tensors[i]; - auto init_node_ = var->GradVarBase()->GradNode(); + auto init_node = var->GradVarBase()->GradNode(); PADDLE_ENFORCE_EQ( var->GradVarBase()->GraphIsFreed(), false, platform::errors::Unavailable( @@ -70,7 +70,7 @@ void BasicEngine::Init( var->GradVarBase()->ClearGradNode(); } - if (init_node_ == nullptr || var->OverridedStopGradient()) { + if (init_node == nullptr || var->OverridedStopGradient()) { VLOG(3) << "Skip auto grad since there is no grad op for var or loss is " "stop_gradient=True: " << var->Name(); @@ -81,8 +81,7 @@ void BasicEngine::Init( PADDLE_ENFORCE_EQ( var->HasGradVar(), true, - platform::errors::NotFound("Grad variable not exist for variable %s", - var->Name())); + platform::errors::NotFound("Tensor %s has no gradient", var->Name())); auto& fwd_var = var->Var().Get(); auto* grad_var = @@ -102,7 +101,7 @@ void BasicEngine::Init( *dev_ctx, grad_var); } - init_nodes_.push_back(init_node_); + init_nodes_.push_back(init_node); } } diff --git a/paddle/fluid/imperative/basic_engine.h b/paddle/fluid/imperative/basic_engine.h index ee245270270e3a..49761a8df0b6b1 100644 --- a/paddle/fluid/imperative/basic_engine.h +++ b/paddle/fluid/imperative/basic_engine.h @@ -76,7 +76,6 @@ class BasicEngine : public Engine { std::vector leaf_accumulators_; bool retain_graph_; - bool create_graph_; }; } // namespace imperative diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index c16999d1fc52dd..501efa0e83c89f 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -919,23 +919,6 @@ void BindImperative(py::module *m_ptr) { print(x.stop_gradient) # True print(x.grad) # None )DOC") - .def("_run_backward", - [](std::shared_ptr &self, - const imperative::Tracer &tracer, bool retain_graph, - std::shared_ptr &grad_tensor) { - // TODO(jiabin): when we impl more backward execution we can - // select them - std::vector> tensors{self}; - std::vector> grad_tensors{ - grad_tensor}; - - auto *engine = tracer.GetEngine(); - engine->Init(tensors, grad_tensors, retain_graph); - VLOG(3) << "Start backward"; - engine->Execute(); - VLOG(3) << "Finish backward"; - }, - py::call_guard()) .def("_grad_name", &imperative::VarBase::GradVarName) .def("_grad_value", [](imperative::VarBase &self) { diff --git a/python/paddle/autograd/backward_mode.py b/python/paddle/autograd/backward_mode.py index ac19ef3ff102d6..96e4336abaa6fa 100644 --- a/python/paddle/autograd/backward_mode.py +++ b/python/paddle/autograd/backward_mode.py @@ -93,8 +93,9 @@ def check_tensors(in_out_list, name): tensors = check_tensors(tensors, "tensors") - assert len(tensors) == len(set( - tensors)), "the arg tensors should not contains same element" + assert len(tensors) == len( + set(tensors) + ), "The argument 'tensors' of paddle.autograd.backward contains duplicate paddle.Tensor object." if grad_tensors is not None: if not isinstance(grad_tensors, (list, tuple)): @@ -104,7 +105,7 @@ def check_tensors(in_out_list, name): if each_tensor is not None: assert isinstance( each_tensor, paddle.Tensor - ), "grad_tensors must be None, Tensor or list containing None or Tensor" + ), "The argument 'grad_tensors' of paddle.autograd.backward is invalid, it can be 'None', 'paddle.Tensor' or 'list[None/paddle.Tensor]'." else: grad_tensors = [None] * len(tensors) diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index 07fabc9cb0a0bd..42479d07c20eac 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -197,11 +197,12 @@ def backward(self, grad_tensor=None, retain_graph=False): if paddle.is_compiled_with_xpu(): # TODO(liuyuhui): Currently only for xpu. Will be removed in the future. scaled_loss = scale_loss(self) - scaled_loss._run_backward(framework._dygraph_tracer(), - retain_graph, grad_tensor) + core.dygraph_run_backward([scaled_loss], [grad_tensor], + retain_graph, + framework._dygraph_tracer()) else: - self._run_backward(framework._dygraph_tracer(), retain_graph, - grad_tensor) + core.dygraph_run_backward([self], [grad_tensor], retain_graph, + framework._dygraph_tracer()) else: raise ValueError( "Variable.backward() is only available in DyGraph mode") From 0374c0bb25fe3cc327b0a7b1e5059342d6eb2691 Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Tue, 30 Mar 2021 02:21:29 +0000 Subject: [PATCH 22/22] add test cases for backward api --- .../tests/unittests/test_custom_grad_input.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py index ddd92138ec2b8d..a7472e7ffd7609 100644 --- a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py +++ b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py @@ -77,6 +77,43 @@ def test_backward_api(self): self.assertTrue(np.allclose(x_grad * 2, x_tensor.grad)) + def test_backward_single_tensor(self): + for dtype in self._dtypes: + x = np.random.random([2, 2]).astype(dtype) + y = np.random.random([2, 2]).astype(dtype) + z = np.matmul(x, y) + grad = np.random.random(z.shape).astype(dtype) + for place in self._places: + with dg.guard(place): + x_tensor = paddle.to_tensor(x, stop_gradient=False) + y_tensor = paddle.to_tensor(y) + z_tensor1 = paddle.matmul(x_tensor, y_tensor) + + grad_tensor = paddle.to_tensor(grad) + paddle.autograd.backward(z_tensor1, grad_tensor, True) + + x_grad = np.matmul(grad, y.T) + + self.assertTrue(np.allclose(x_grad, x_tensor.grad)) + + def test_backward_none_grad_tensor(self): + for dtype in self._dtypes: + x = np.random.random([2, 2]).astype(dtype) + y = np.random.random([2, 2]).astype(dtype) + z = np.matmul(x, y) + grad = np.ones(z.shape).astype(dtype) + for place in self._places: + with dg.guard(place): + x_tensor = paddle.to_tensor(x, stop_gradient=False) + y_tensor = paddle.to_tensor(y) + z_tensor1 = paddle.matmul(x_tensor, y_tensor) + + paddle.autograd.backward(z_tensor1, None) + + x_grad = np.matmul(grad, y.T) + + self.assertTrue(np.allclose(x_grad, x_tensor.grad)) + if __name__ == '__main__': unittest.main()