Skip to content

Commit 83b953f

Browse files
add custom init grad for backward function (#31540)
* add custom init grad for backward function * add custom init grad for backward function * handle when the grad_tensor is none * handle when the grad_tensor is none * fix the args type error on windows platform * modify the args order and doc * format code * add grad_tensor to xpu * modify the grad_tensor type check * add paddle.backward api to support multi tensors gradient compute * add paddle.backward api to support multi tensors gradient compute * add paddle.atuograd module and backward api * change tensor.backward func args * modify tensor backward api * remove create_graph intputs args * add doc and examplex code for backward api * when have the same tensor, throw error * modify test Init func args * modify the execute.Init func args in test files * add paddle.autograd package in setup.py.in * modify error msg, remove _run_backward method in class Tensor * add test cases for backward api
1 parent 9c5d028 commit 83b953f

12 files changed

Lines changed: 397 additions & 66 deletions

File tree

paddle/fluid/imperative/basic_engine.cc

Lines changed: 72 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -36,48 +36,73 @@ DECLARE_bool(sort_sum_gradient);
3636
namespace paddle {
3737
namespace imperative {
3838

39-
void BasicEngine::Init(VarBase* var, bool retain_graph) {
39+
void BasicEngine::Init(
40+
const std::vector<std::shared_ptr<VarBase>>& tensors,
41+
const std::vector<std::shared_ptr<VarBase>>& grad_tensors,
42+
bool retain_graph) {
4043
retain_graph_ = retain_graph;
41-
init_node_ = var->GradVarBase()->GradNode();
42-
PADDLE_ENFORCE_EQ(var->GradVarBase()->GraphIsFreed(), false,
43-
platform::errors::Unavailable(
44-
"%s trying to backward through the same graph a second "
45-
"time, but this graph have already been freed. Please "
46-
"specify Tensor.backward(retain_graph=True) when "
47-
"calling backward at the first time.",
48-
var->Name()));
49-
50-
if (!retain_graph) {
51-
VLOG(5) << "Clear the auto-grad graph from grad var " << var->Name()
52-
<< " because of retain_graph=False when calling backward";
53-
var->GradVarBase()->SetGraphIsFreed(true);
54-
var->GradVarBase()->ClearGradNode();
55-
}
5644

57-
if (init_node_ == nullptr || var->OverridedStopGradient()) {
58-
VLOG(3) << "Skip auto grad since there is no grad op for var or loss is "
59-
"stop_gradient=True: "
60-
<< var->Name();
61-
return;
62-
}
45+
PADDLE_ENFORCE_EQ(
46+
tensors.size(), grad_tensors.size(),
47+
platform::errors::Unavailable(
48+
"The size of tensors do not equal the size of grad_tensors,"
49+
"the size of tensors is %s, but the size of grad_tensors is %s.",
50+
tensors.size(), grad_tensors.size()));
51+
52+
for (size_t i = 0; i < tensors.size(); ++i) {
53+
auto var = tensors[i];
54+
auto grad_tensor = grad_tensors[i];
55+
56+
auto init_node = var->GradVarBase()->GradNode();
57+
PADDLE_ENFORCE_EQ(
58+
var->GradVarBase()->GraphIsFreed(), false,
59+
platform::errors::Unavailable(
60+
"%s trying to backward through the same graph a second "
61+
"time, but this graph have already been freed. Please "
62+
"specify Tensor.backward(retain_graph=True) when "
63+
"calling backward at the first time.",
64+
var->Name()));
65+
66+
if (!retain_graph) {
67+
VLOG(5) << "Clear the auto-grad graph from grad var " << var->Name()
68+
<< " because of retain_graph=False when calling backward";
69+
var->GradVarBase()->SetGraphIsFreed(true);
70+
var->GradVarBase()->ClearGradNode();
71+
}
6372

64-
VLOG(3) << "Init first node of backward";
73+
if (init_node == nullptr || var->OverridedStopGradient()) {
74+
VLOG(3) << "Skip auto grad since there is no grad op for var or loss is "
75+
"stop_gradient=True: "
76+
<< var->Name();
77+
continue;
78+
}
6579

66-
PADDLE_ENFORCE_EQ(
67-
var->HasGradVar(), true,
68-
platform::errors::NotFound("Grad variable not exist for variable %s",
69-
var->Name()));
70-
71-
auto& fwd_var = var->Var().Get<framework::LoDTensor>();
72-
auto* grad_var =
73-
var->GradVarBase()->MutableVar()->GetMutable<framework::LoDTensor>();
74-
VLOG(6) << "init loss grad:" << var->GradVarBase()->Name()
75-
<< " as stop_gradient false";
76-
var->GradVarBase()->InnerSetOverridedStopGradient(false);
77-
auto* dev_ctx = platform::DeviceContextPool::Instance().Get(fwd_var.place());
78-
grad_var->Resize(fwd_var.dims());
79-
grad_var->mutable_data(fwd_var.place(), fwd_var.type());
80-
operators::math::set_constant(*dev_ctx, grad_var, 1.0);
80+
VLOG(3) << "Init node of backward";
81+
82+
PADDLE_ENFORCE_EQ(
83+
var->HasGradVar(), true,
84+
platform::errors::NotFound("Tensor %s has no gradient", var->Name()));
85+
86+
auto& fwd_var = var->Var().Get<framework::LoDTensor>();
87+
auto* grad_var =
88+
var->GradVarBase()->MutableVar()->GetMutable<framework::LoDTensor>();
89+
VLOG(6) << "init loss grad:" << var->GradVarBase()->Name()
90+
<< " as stop_gradient false";
91+
var->GradVarBase()->InnerSetOverridedStopGradient(false);
92+
auto* dev_ctx =
93+
platform::DeviceContextPool::Instance().Get(fwd_var.place());
94+
if (grad_tensor == nullptr) {
95+
grad_var->Resize(fwd_var.dims());
96+
grad_var->mutable_data(fwd_var.place(), fwd_var.type());
97+
operators::math::set_constant(*dev_ctx, grad_var, 1.0);
98+
} else {
99+
paddle::framework::TensorCopy(
100+
grad_tensor->Var().Get<framework::LoDTensor>(), fwd_var.place(),
101+
*dev_ctx, grad_var);
102+
}
103+
104+
init_nodes_.push_back(init_node);
105+
}
81106
}
82107

83108
void BasicEngine::CheckBackwardInputs(const OpBase& op) {
@@ -224,8 +249,10 @@ void BasicEngine::PrepareDeps() {
224249
std::queue<GradOpNode*> q;
225250
std::unordered_set<GradOpNode*> visited;
226251

227-
q.push(init_node_.get());
228-
visited.insert(init_node_.get());
252+
for (size_t i = 0; i < init_nodes_.size(); ++i) {
253+
q.push(init_nodes_[i].get());
254+
visited.insert(init_nodes_[i].get());
255+
}
229256

230257
while (!q.empty()) {
231258
auto* cur_node = q.front();
@@ -276,14 +303,16 @@ static std::shared_ptr<NameVarMap<VariableWrapper>> CallGradientHooks(
276303
}
277304

278305
void BasicEngine::Execute() {
279-
if (init_node_ == nullptr) {
306+
if (init_nodes_.empty()) {
280307
return;
281308
}
282309

283310
PrepareDeps();
284311
// Start execute Computation graph
285312
std::queue<std::shared_ptr<GradOpNode>> q;
286-
q.push(std::move(init_node_));
313+
for (size_t i = 0; i < init_nodes_.size(); ++i) {
314+
q.push(std::move(init_nodes_[i]));
315+
}
287316

288317
size_t op_num = 0;
289318

@@ -505,7 +534,7 @@ void BasicEngine::Execute() {
505534
}
506535

507536
void BasicEngine::Clear() {
508-
init_node_.reset();
537+
init_nodes_.clear();
509538
node_deps_.clear();
510539
accumulators_.clear();
511540
accumulators_with_grad_node_.clear();

paddle/fluid/imperative/basic_engine.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ class OpBase;
3030

3131
class BasicEngine : public Engine {
3232
public:
33-
void Init(VarBase* var, bool retain_graph = false);
33+
void Init(const std::vector<std::shared_ptr<VarBase>>& tensors,
34+
const std::vector<std::shared_ptr<VarBase>>& grad_tensors,
35+
bool retain_graph = false);
3436

3537
void Execute() override;
3638

@@ -46,7 +48,7 @@ class BasicEngine : public Engine {
4648
void Clear();
4749

4850
private:
49-
std::shared_ptr<GradOpNode> init_node_;
51+
std::vector<std::shared_ptr<GradOpNode>> init_nodes_;
5052
std::unordered_map<GradOpNode*, size_t> node_deps_;
5153
// The input and output of Inplace op are the same. If only `var` is used
5254
// as the key, then the input and output of inplace op must be gradient

paddle/fluid/imperative/tests/test_hooks.cc

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,10 @@ TEST(TestHooks, TestGradVarLeafBackwardHook) {
9292
ASSERT_EQ(out->GradVarBase()->GradOpNum(), 1UL);
9393

9494
// 3. backward
95+
std::vector<std::shared_ptr<imperative::VarBase>> tensors{out};
96+
std::vector<std::shared_ptr<imperative::VarBase>> grad_tensors{nullptr};
9597
BasicEngine engine;
96-
engine.Init(out.get());
98+
engine.Init(tensors, grad_tensors);
9799
engine.Execute();
98100

99101
framework::LoDTensor x_grad;
@@ -191,8 +193,10 @@ void GradVarLeafBackwardHookWithGradAccmulatedTest() {
191193
ASSERT_EQ(out->GradVarBase()->GradOpNum(), 1UL);
192194

193195
// 3. backward
196+
std::vector<std::shared_ptr<imperative::VarBase>> tensors{out};
197+
std::vector<std::shared_ptr<imperative::VarBase>> grad_tensors{nullptr};
194198
BasicEngine engine;
195-
engine.Init(out.get());
199+
engine.Init(tensors, grad_tensors);
196200
engine.Execute();
197201

198202
framework::LoDTensor x_grad;

paddle/fluid/imperative/tests/test_tracer.cc

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,10 @@ TEST(test_tracer, test_trace_op_with_multi_device_inputs) {
250250
tracer.TraceOp("reduce_sum", reduce_in, reduce_out, reduce_attr_map,
251251
gpu_place, true);
252252
imperative::BasicEngine engine;
253-
engine.Init(reduce_sum_out.get());
253+
254+
std::vector<std::shared_ptr<imperative::VarBase>> tensors{reduce_sum_out};
255+
std::vector<std::shared_ptr<imperative::VarBase>> grad_tensors{nullptr};
256+
engine.Init(tensors, grad_tensors);
254257
engine.Execute();
255258

256259
framework::LoDTensor rlt;
@@ -376,8 +379,10 @@ TEST(test_tracer, test_var_without_grad_var) {
376379
ASSERT_EQ(y_in->GradVarBase()->GradOpNum(), 0UL);
377380
ASSERT_EQ(vout->GradVarBase()->GradOpNum(), 1UL);
378381

382+
std::vector<std::shared_ptr<imperative::VarBase>> tensors{vout};
383+
std::vector<std::shared_ptr<imperative::VarBase>> grad_tensors{nullptr};
379384
imperative::BasicEngine engine;
380-
engine.Init(vout.get());
385+
engine.Init(tensors, grad_tensors);
381386
engine.Execute();
382387

383388
// check the grad

paddle/fluid/pybind/imperative.cc

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -805,6 +805,7 @@ void BindImperative(py::module *m_ptr) {
805805
Bump the version whenever the Tensor is modified through an inplace operation.
806806
)DOC")
807807
.def("numpy",
808+
808809
[](imperative::VarBase &self) -> py::array {
809810
const auto &tensor =
810811
self.MutableVar()->Get<framework::LoDTensor>();
@@ -1003,18 +1004,6 @@ void BindImperative(py::module *m_ptr) {
10031004
print(x.stop_gradient) # True
10041005
print(x.grad) # None
10051006
)DOC")
1006-
.def("_run_backward",
1007-
[](imperative::VarBase &self, const imperative::Tracer &tracer,
1008-
bool retain_graph) {
1009-
// TODO(jiabin): when we impl more backward execution we can
1010-
// select them
1011-
auto *engine = tracer.GetEngine();
1012-
engine->Init(&self, retain_graph);
1013-
VLOG(3) << "Start backward";
1014-
engine->Execute();
1015-
VLOG(3) << "Finish backward";
1016-
},
1017-
py::call_guard<py::gil_scoped_release>())
10181007
.def("_grad_name", &imperative::VarBase::GradVarName)
10191008
.def("_grad_value",
10201009
[](imperative::VarBase &self) {
@@ -1549,6 +1538,19 @@ void BindImperative(py::module *m_ptr) {
15491538
},
15501539
py::call_guard<py::gil_scoped_release>());
15511540

1541+
m.def(
1542+
"dygraph_run_backward",
1543+
[](const std::vector<std::shared_ptr<imperative::VarBase>> &tensors,
1544+
const std::vector<std::shared_ptr<imperative::VarBase>> &grad_tensors,
1545+
bool retain_graph, const imperative::Tracer &tracer) {
1546+
auto *engine = tracer.GetEngine();
1547+
engine->Init(tensors, grad_tensors, retain_graph);
1548+
VLOG(3) << "Start backward";
1549+
engine->Execute();
1550+
VLOG(3) << "Finish backward";
1551+
},
1552+
py::call_guard<py::gil_scoped_release>());
1553+
15521554
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \
15531555
defined(PADDLE_WITH_XPU_BKCL)
15541556
py::class_<imperative::ParallelContext,

python/paddle/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
import paddle.device
4545
import paddle.regularizer
4646
import paddle.incubate
47+
import paddle.autograd
4748

4849
# TODO: define alias in tensor and framework directory
4950

python/paddle/autograd/__init__.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from ..fluid.dygraph.base import grad #DEFINE_ALIAS
16+
17+
from . import backward_mode
18+
from .backward_mode import backward
19+
20+
__all__ = ['grad']
21+
22+
__all__ += backward_mode.__all__

0 commit comments

Comments
 (0)