Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
d0915f8
add custom init grad for backward function
MingMingShangTian Mar 11, 2021
0bccce6
add custom init grad for backward function
MingMingShangTian Mar 11, 2021
5dac8e9
handle when the grad_tensor is none
MingMingShangTian Mar 12, 2021
ef4c7b9
handle when the grad_tensor is none
MingMingShangTian Mar 12, 2021
33b0416
fix the args type error on windows platform
MingMingShangTian Mar 15, 2021
837e26b
modify the args order and doc
MingMingShangTian Mar 15, 2021
1901970
format code
MingMingShangTian Mar 15, 2021
55e0cfb
add grad_tensor to xpu
MingMingShangTian Mar 15, 2021
8271dc0
modify the grad_tensor type check
MingMingShangTian Mar 16, 2021
5af3bd0
add paddle.backward api to support multi tensors gradient compute
MingMingShangTian Mar 18, 2021
1467feb
add paddle.backward api to support multi tensors gradient compute
MingMingShangTian Mar 18, 2021
eb267fa
add paddle.atuograd module and backward api
MingMingShangTian Mar 19, 2021
b80f449
Merge branch 'develop' into custom_staring_grad
MingMingShangTian Mar 23, 2021
2bb8f3c
change tensor.backward func args
MingMingShangTian Mar 23, 2021
41b375f
modify tensor backward api
MingMingShangTian Mar 23, 2021
6974e5c
remove create_graph intputs args
MingMingShangTian Mar 23, 2021
1e3e975
add doc and examplex code for backward api
MingMingShangTian Mar 24, 2021
c7de011
when have the same tensor, throw error
MingMingShangTian Mar 24, 2021
2f2824c
modify test Init func args
MingMingShangTian Mar 24, 2021
8415df4
modify the execute.Init func args in test files
MingMingShangTian Mar 24, 2021
be065e4
add paddle.autograd package in setup.py.in
MingMingShangTian Mar 24, 2021
7f8e58c
modify error msg, remove _run_backward method in class Tensor
MingMingShangTian Mar 29, 2021
0374c0b
add test cases for backward api
MingMingShangTian Mar 30, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions paddle/fluid/imperative/basic_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ DECLARE_bool(sort_sum_gradient);
namespace paddle {
namespace imperative {

void BasicEngine::Init(VarBase* var, bool retain_graph) {
void BasicEngine::Init(VarBase* var, bool retain_graph, VarBase* grad_tensor) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

可以把 grad_tensor 设置为默认从参数nullptr

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

声明处默认参数为nullptr

retain_graph_ = retain_graph;
init_node_ = var->GradVarBase()->GradNode();
PADDLE_ENFORCE_EQ(var->GradVarBase()->GraphIsFreed(), false,
Expand Down Expand Up @@ -75,9 +75,15 @@ void BasicEngine::Init(VarBase* var, bool retain_graph) {
<< " as stop_gradient false";
var->GradVarBase()->InnerSetOverridedStopGradient(false);
auto* dev_ctx = platform::DeviceContextPool::Instance().Get(fwd_var.place());
grad_var->Resize(fwd_var.dims());
grad_var->mutable_data(fwd_var.place(), fwd_var.type());
operators::math::set_constant(*dev_ctx, grad_var, 1.0);
if (grad_tensor == nullptr) {
grad_var->Resize(fwd_var.dims());
grad_var->mutable_data(fwd_var.place(), fwd_var.type());
operators::math::set_constant(*dev_ctx, grad_var, 1.0);
} else {
paddle::framework::TensorCopy(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里是否需要check,grad_tensor的维度和var的维度是否一致呢?

grad_tensor->Var().Get<framework::LoDTensor>(), fwd_var.place(),
*dev_ctx, grad_var);
}
}

void BasicEngine::CheckBackwardInputs(const OpBase& op) {
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/imperative/basic_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ class OpBase;

class BasicEngine : public Engine {
public:
void Init(VarBase* var, bool retain_graph = false);
void Init(VarBase* var, bool retain_graph = false,
VarBase* grad_tensor = nullptr);

void Execute() override;

Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/pybind/imperative.cc
Original file line number Diff line number Diff line change
Expand Up @@ -920,11 +920,11 @@ void BindImperative(py::module *m_ptr) {
)DOC")
.def("_run_backward",
[](imperative::VarBase &self, const imperative::Tracer &tracer,
bool retain_graph) {
bool retain_graph, imperative::VarBase &grad_tensor) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里需要处理下默认参数, 后面加下py::arg("grad_tensor") = nullptr

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

// TODO(jiabin): when we impl more backward execution we can
// select them
auto *engine = tracer.GetEngine();
engine->Init(&self, retain_graph);
engine->Init(&self, retain_graph, &grad_tensor);
VLOG(3) << "Start backward";
engine->Execute();
VLOG(3) << "Finish backward";
Expand Down
24 changes: 22 additions & 2 deletions python/paddle/fluid/dygraph/varbase_patch_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def set_value(self, value):
framework._current_expected_place())

@framework.dygraph_only
def backward(self, retain_graph=False):
def backward(self, retain_graph=False, grad_tensor=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个grad_tensor放到retain_graph前面是不是更合理一些,因为相对来说使用评率会更高,放到前面可能会引入一些兼容性风险,但长期更合理,可以看看目前框架内有多少使用retain_graph的测试

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

"""
Run backward of current Graph which starts from current Tensor.

Expand All @@ -147,6 +147,10 @@ def backward(self, retain_graph=False):
:code:`retain_graph` to True, then the grads will be retained. Thus, seting it to False is much more memory-efficient.
Defaults to False.

grad_tensor(Tensor, optional): initial gradient values of `outputs` . If `grad_tensor` is None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

因为这个目前是的Tensor的API,所以这里initial gradient values of outputs改为initial gradient values of current Tensor是不是更容易理解

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

the initial gradient values of `outputs` would be Tensor filled with 1;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

同上

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

if `grad_tensor` is not None, it must have the same length as `outputs`.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

同上

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Default None.
Returns:
NoneType: None

Expand All @@ -168,6 +172,17 @@ def backward(self, retain_graph=False):
print("{}".format(x.grad))
# 0.

grad_tensor=paddle.to_tensor(2.)
for i in range(5):
y = paddle.pow(x, 4.0)
y.backward(grad_tensor=grad_tensor)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

如果改到前面,这里的参数名可以省略,写法更简洁

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

print("{}: {}".format(i, x.grad))
# 0: [1000.]
# 1: [2000.]
# 2: [3000.]
# 3: [4000.]
# 4: [5000.]

"""
if framework.in_dygraph_mode():
if paddle.is_compiled_with_xpu():
Expand All @@ -176,7 +191,12 @@ def backward(self, retain_graph=False):
scaled_loss._run_backward(framework._dygraph_tracer(),
retain_graph)
else:
self._run_backward(framework._dygraph_tracer(), retain_graph)
if grad_tensor is not None:
assert grad_tensor.shape == self.shape, "Variable Shape not match, Variable of grad_tensor [ {} ] with shape {} mismatch Variable [ {} ] with shape {}".format(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Variable -> Tensornot match -> does not match,语句再组织一下,语法不太通顺,Variable统一改为使用Tensor

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

grad_tensor.name, grad_tensor.shape, self.name,
self.shape)
self._run_backward(framework._dygraph_tracer(), retain_graph,
grad_tensor)
else:
raise ValueError(
"Variable.backward() is only available in DyGraph mode")
Expand Down
53 changes: 53 additions & 0 deletions python/paddle/fluid/tests/unittests/test_custom_grad_input.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function

import unittest
import numpy as np

import paddle
import paddle.fluid.dygraph as dg
from op_test import OpTest


class TestBackward(unittest.TestCase):
def setUp(self):
self._dtypes = ["float32", "float64"]
self._places = [paddle.CPUPlace()]
if paddle.is_compiled_with_cuda():
self._places.append(paddle.CUDAPlace(0))

def test_all_positive(self):
for dtype in self._dtypes:
x = np.random.random([2, 100]).astype(dtype)
y = np.random.random([100, 2]).astype(dtype)
z = np.matmul(x, y)
grad = np.random.random(z.shape).astype(dtype)
for place in self._places:
with dg.guard(place):
x_tensor = paddle.to_tensor(x, stop_gradient=False)
y_tensor = paddle.to_tensor(y)
z_tensor = paddle.matmul(x_tensor, y_tensor)

grad_tensor = paddle.to_tensor(grad)
z_tensor.backward(grad_tensor=grad_tensor)

x_grad = np.matmul(grad, y.T)

self.assertTrue(np.allclose(x_grad, x_tensor.grad))


if __name__ == '__main__':
unittest.main()