Skip to content

Commit a3bc3bc

Browse files
authored
Fix scaled_params append error in AdamW. (#28633)
Fix no_grad setting in AdamW. test=develop
1 parent c4d22c8 commit a3bc3bc

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

python/paddle/optimizer/adamw.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from .optimizer import Optimizer
1616
from .adam import Adam
1717
from ..fluid import framework
18+
from ..fluid.dygraph import base as imperative_base
1819
import paddle
1920
from paddle.fluid.dygraph.parallel import apply_collective_grads
2021

@@ -171,13 +172,14 @@ def _scale_parameters(self, params_and_grads):
171172
learning_rate = self._learning_rate()
172173
with param.block.program._optimized_guard(
173174
[param, grad]), framework.name_scope('weight decay'):
175+
scaled_params.append(
176+
(param, grad, param * self._coeff * learning_rate))
174177
if param.name not in self._params_name:
175-
scaled_params.append(
176-
(param, grad, param * self._coeff * learning_rate))
177178
self._params_name.add(param.name)
178179
param = param * self._coeff
179180
return scaled_params
180181

182+
@imperative_base.no_grad
181183
def minimize(self,
182184
loss,
183185
startup_program=None,
@@ -207,6 +209,7 @@ def minimize(self,
207209
return optimize_ops, params_grads
208210

209211
@framework.dygraph_only
212+
@imperative_base.no_grad
210213
def step(self):
211214
if paddle.distributed.get_world_size() > 1:
212215
apply_collective_grads(self._parameter_list)
@@ -227,7 +230,7 @@ def step(self):
227230
[param, grad]), framework.name_scope('weight decay'):
228231
updated_param = paddle.fluid.layers.elementwise_sub(
229232
x=param, y=scaled_param)
230-
param.set_value(updated_param.numpy())
233+
paddle.fluid.layers.assign(input=updated_param, output=param)
231234
self._apply_optimize(
232235
loss=None, startup_program=None, params_grads=params_grads)
233236

0 commit comments

Comments
 (0)