Skip to content

Commit 961560d

Browse files
committed
fix english doc of lr_scheduler
1 parent 476f8e3 commit 961560d

File tree

1 file changed

+48
-88
lines changed

1 file changed

+48
-88
lines changed

python/paddle/optimizer/lr_scheduler.py

Lines changed: 48 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,8 @@
2626

2727
class _LRScheduler(object):
2828
"""
29-
LRScheduler Base class.
3029
31-
Define the common interface of an learning rate scheduler.
30+
LRScheduler Base class. Define the common interface of a learning rate scheduler.
3231
3332
User can import it by ``form paddle.optimizer.lr_scheduler import _LRScheduler`` ,
3433
@@ -97,6 +96,7 @@ def __call__(self):
9796

9897
def step(self, epoch=None):
9998
"""
99+
100100
``step`` should be called after ``optimizer.step`` . It will update the learning rate in optimizer according to current ``epoch`` .
101101
The new learning rate will take effect on next ``optimizer.step`` .
102102
@@ -105,29 +105,7 @@ def step(self, epoch=None):
105105
106106
Returns:
107107
None
108-
109-
Examples:
110-
Please refer to the subclass of ``_LRScheduler`` (Base Class). ``StepLR`` is used as an example here.
111-
112-
.. code-block:: python
113-
import paddle
114-
import numpy as np
115-
116-
# train on default dynamic graph mode
117-
paddle.disable_static()
118-
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
119-
linear = paddle.nn.Linear(10, 10)
120-
scheduler = paddle.optimizer.lr_scheduler.StepLR(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True)
121-
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters())
122-
for epoch in range(20):
123-
for batch_id in range(2):
124-
x = paddle.to_tensor(x)
125-
out = linear(x)
126-
loss = paddle.reduce_mean(out)
127-
loss.backward()
128-
sgd.step()
129-
sgd.clear_gradients()
130-
scheduler.step()
108+
131109
"""
132110
if epoch is None:
133111
self.last_epoch += 1
@@ -145,9 +123,10 @@ def step(self, epoch=None):
145123

146124
def state_dict(self):
147125
"""
126+
148127
Returns the state of the scheduler as a :class:`dict`.
149128
150-
It is a subset of self.__dict__ .
129+
It is a subset of ``self.__dict__`` .
151130
"""
152131
self._state_keys()
153132
state_dict = {}
@@ -169,17 +148,19 @@ def state_dict(self):
169148
# (Note): you can change it for your subclass.
170149
def _state_keys(self):
171150
"""
151+
172152
For those subclass who overload ``_LRScheduler`` (Base Class). Acquiescently, "last_epoch, last_lr" will be saved by ``self.keys = ['last_epoch', 'last_lr']`` .
173153
174154
``last_epoch`` is the current epoch num, and ``last_lr`` is the current learning rate.
175155
176-
User can change the default behavior by redefining the dict ``self.keys`` .
156+
If you want to change the default behavior, you should have a custom implementation of ``_state_keys()`` to redefine ``self.keys`` .
177157
178158
"""
179159
self.keys = ['last_epoch', 'last_lr']
180160

181161
def set_state_dict(self, state_dict):
182162
"""
163+
183164
Loads the schedulers state.
184165
"""
185166
self._state_keys()
@@ -200,6 +181,7 @@ def set_state_dict(self, state_dict):
200181

201182
def get_lr(self):
202183
"""
184+
203185
For those subclass who overload ``_LRScheduler`` (Base Class), User should have a custom implementation of ``get_lr()`` .
204186
205187
Otherwise, an ``NotImplementedError`` exception will be thrown.
@@ -239,14 +221,12 @@ class NoamLR(_LRScheduler):
239221
import numpy as np
240222
241223
# train on default dynamic graph mode
242-
paddle.disable_static()
243-
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
244224
linear = paddle.nn.Linear(10, 10)
245225
scheduler = paddle.optimizer.lr_scheduler.NoamLR(d_model=0.01, warmup_steps=100, verbose=True)
246-
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters())
226+
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
247227
for epoch in range(20):
248228
for batch_id in range(2):
249-
x = paddle.to_tensor(x)
229+
x = paddle.uniform([10, 10])
250230
out = linear(x)
251231
loss = paddle.reduce_mean(out)
252232
loss.backward()
@@ -337,14 +317,12 @@ class PiecewiseLR(_LRScheduler):
337317
import numpy as np
338318
339319
# train on default dynamic graph mode
340-
paddle.disable_static()
341-
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
342320
linear = paddle.nn.Linear(10, 10)
343321
scheduler = paddle.optimizer.lr_scheduler.PiecewiseLR(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True)
344-
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters())
322+
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
345323
for epoch in range(20):
346324
for batch_id in range(2):
347-
x = paddle.to_tensor(x)
325+
x = paddle.uniform([10, 10])
348326
out = linear(x)
349327
loss = paddle.reduce_mean(out)
350328
loss.backward()
@@ -421,14 +399,12 @@ class NaturalExpLR(_LRScheduler):
421399
import numpy as np
422400
423401
# train on default dynamic graph mode
424-
paddle.disable_static()
425-
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
426402
linear = paddle.nn.Linear(10, 10)
427403
scheduler = paddle.optimizer.lr_scheduler.NaturalExpLR(learning_rate=0.5, gamma=0.1, verbose=True)
428-
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters())
404+
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
429405
for epoch in range(20):
430406
for batch_id in range(2):
431-
x = paddle.to_tensor(x)
407+
x = paddle.uniform([10, 10])
432408
out = linear(x)
433409
loss = paddle.reduce_mean(out)
434410
loss.backward()
@@ -500,14 +476,12 @@ class InverseTimeLR(_LRScheduler):
500476
import numpy as np
501477
502478
# train on default dynamic graph mode
503-
paddle.disable_static()
504-
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
505479
linear = paddle.nn.Linear(10, 10)
506480
scheduler = paddle.optimizer.lr_scheduler.InverseTimeLR(learning_rate=0.5, gamma=0.1, verbose=True)
507-
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters())
481+
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
508482
for epoch in range(20):
509483
for batch_id in range(2):
510-
x = paddle.to_tensor(x)
484+
x = paddle.uniform([10, 10])
511485
out = linear(x)
512486
loss = paddle.reduce_mean(out)
513487
loss.backward()
@@ -596,14 +570,12 @@ class PolynomialLR(_LRScheduler):
596570
import numpy as np
597571
598572
# train on default dynamic graph mode
599-
paddle.disable_static()
600-
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
601573
linear = paddle.nn.Linear(10, 10)
602574
scheduler = paddle.optimizer.lr_scheduler.PolynomialLR(learning_rate=0.5, decay_steps=20, verbose=True)
603-
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters())
575+
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
604576
for epoch in range(20):
605577
for batch_id in range(2):
606-
x = paddle.to_tensor(x)
578+
x = paddle.uniform([10, 10])
607579
out = linear(x)
608580
loss = paddle.reduce_mean(out)
609581
loss.backward()
@@ -678,15 +650,15 @@ class LinearLrWarmup(_LRScheduler):
678650
679651
When epoch < warmup_steps, learning rate is updated as:
680652
681-
.. code-block:: text
653+
.. math::
682654
683-
lr = start_lr + (end_lr - start_lr) * (epoch / warmup_steps)
655+
lr = start\_lr + (end\_lr - start\_lr) * \\frac{epoch}{warmup\_steps}
684656
685657
where start_lr is the initial learning rate, and end_lr is the final learning rate;
686658
687659
When epoch >= warmup_steps, learning rate is updated as:
688660
689-
.. code-block:: text
661+
.. math::
690662
691663
lr = learning_rate
692664
@@ -711,15 +683,13 @@ class LinearLrWarmup(_LRScheduler):
711683
import numpy as np
712684
713685
# train on default dynamic graph mode
714-
paddle.disable_static()
715-
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
716686
linear = paddle.nn.Linear(10, 10)
717687
scheduler = paddle.optimizer.LinearLrWarmup(
718688
learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True)
719-
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters())
689+
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
720690
for epoch in range(20):
721691
for batch_id in range(2):
722-
x = paddle.to_tensor(x)
692+
x = paddle.uniform([10, 10])
723693
out = linear(x)
724694
loss = paddle.reduce_mean(out)
725695
loss.backward()
@@ -791,7 +761,7 @@ def get_lr(self):
791761
class ExponentialLR(_LRScheduler):
792762
"""
793763
794-
Update learning rate by 'gamma' each epoch.
764+
Update learning rate by `gamma` each epoch.
795765
796766
The algorithm can be described as following.
797767
@@ -817,14 +787,12 @@ class ExponentialLR(_LRScheduler):
817787
import numpy as np
818788
819789
# train on default dynamic graph mode
820-
paddle.disable_static()
821-
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
822790
linear = paddle.nn.Linear(10, 10)
823791
scheduler = paddle.optimizer.lr_scheduler.ExponentialLR(learning_rate=0.5, gamma=0.9, verbose=True)
824-
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters())
792+
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
825793
for epoch in range(20):
826794
for batch_id in range(2):
827-
x = paddle.to_tensor(x)
795+
x = paddle.uniform([10, 10])
828796
out = linear(x)
829797
loss = paddle.reduce_mean(out)
830798
loss.backward()
@@ -905,14 +873,12 @@ class MultiStepLR(_LRScheduler):
905873
import numpy as np
906874
907875
# train on default dynamic graph mode
908-
paddle.disable_static()
909-
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
910876
linear = paddle.nn.Linear(10, 10)
911877
scheduler = paddle.optimizer.lr_scheduler.MultiStepLR(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True)
912-
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters())
878+
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
913879
for epoch in range(20):
914880
for batch_id in range(2):
915-
x = paddle.to_tensor(x)
881+
x = paddle.uniform([10, 10])
916882
out = linear(x)
917883
loss = paddle.reduce_mean(out)
918884
loss.backward()
@@ -1014,14 +980,12 @@ class StepLR(_LRScheduler):
1014980
import numpy as np
1015981
1016982
# train on default dynamic graph mode
1017-
paddle.disable_static()
1018-
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
1019983
linear = paddle.nn.Linear(10, 10)
1020984
scheduler = paddle.optimizer.lr_scheduler.StepLR(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True)
1021-
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters())
985+
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
1022986
for epoch in range(20):
1023987
for batch_id in range(2):
1024-
x = paddle.to_tensor(x)
988+
x = paddle.uniform([10, 10])
1025989
out = linear(x)
1026990
loss = paddle.reduce_mean(out)
1027991
loss.backward()
@@ -1089,9 +1053,9 @@ class LambdaLR(_LRScheduler):
10891053
learning_rate = 0.5 # init learning_rate
10901054
lr_lambda = lambda epoch: 0.95 ** epoch
10911055
1092-
learning_rate = 0.5 # epoch 0
1093-
learning_rate = 0.475 # epoch 1
1094-
learning_rate = 0.45125 # epoch 2
1056+
learning_rate = 0.5 # epoch 0, 0.5*0.95**0
1057+
learning_rate = 0.475 # epoch 1, 0.5*0.95**1
1058+
learning_rate = 0.45125 # epoch 2, 0.5*0.95**2
10951059
10961060
Args:
10971061
learning_rate (float): The initial learning rate. It is a python float number.
@@ -1110,14 +1074,12 @@ class LambdaLR(_LRScheduler):
11101074
import numpy as np
11111075
11121076
# train on default dynamic graph mode
1113-
paddle.disable_static()
1114-
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
11151077
linear = paddle.nn.Linear(10, 10)
11161078
scheduler = paddle.optimizer.lr_scheduler.LambdaLR(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True)
1117-
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters())
1079+
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
11181080
for epoch in range(20):
11191081
for batch_id in range(2):
1120-
x = paddle.to_tensor(x)
1082+
x = paddle.uniform([10, 10])
11211083
out = linear(x)
11221084
loss = paddle.reduce_mean(out)
11231085
loss.backward()
@@ -1210,14 +1172,12 @@ class ReduceLROnPlateau(_LRScheduler):
12101172
import numpy as np
12111173
12121174
# train on default dynamic graph mode
1213-
paddle.disable_static()
1214-
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
12151175
linear = paddle.nn.Linear(10, 10)
12161176
scheduler = paddle.optimizer.lr_scheduler.ReduceLROnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True)
1217-
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters())
1177+
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
12181178
for epoch in range(20):
12191179
for batch_id in range(2):
1220-
x = paddle.to_tensor(x)
1180+
x = paddle.uniform([10, 10])
12211181
out = linear(x)
12221182
loss = paddle.reduce_mean(out)
12231183
loss.backward()
@@ -1312,7 +1272,7 @@ def _state_keys(self):
13121272

13131273
def step(self, metrics, epoch=None):
13141274
"""
1315-
step should be called after 'minimize' . It will update the learning rate in optimizer according to ``metrics`` .
1275+
step should be called after `optimizer.step()` . It will update the learning rate in optimizer according to ``metrics`` .
13161276
The new learning rate will take effect on next epoch.
13171277
13181278
Args:
@@ -1387,12 +1347,13 @@ class CosineAnnealingLR(_LRScheduler):
13871347
The algorithm can be described as following.
13881348
13891349
.. math::
1390-
\begin{aligned}
1391-
\eta_t & = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})\left(1
1392-
+ \cos\left(\frac{T_{cur}}{T_{max}}\pi\right)\right),
1393-
& T_{cur} \neq (2k+1)T_{max}; \\
1394-
\eta_{t+1} & = \eta_{t} + \frac{1}{2}(\eta_{max} - \eta_{min})
1395-
\left(1 - \cos\left(\frac{1}{T_{max}}\pi\right)\right),
1350+
1351+
\\begin{aligned}
1352+
\eta_t & = \eta_{min} + \\frac{1}{2}(\eta_{max} - \eta_{min})\left(1
1353+
+ \cos\left(\\frac{T_{cur}}{T_{max}}\pi\\right)\\right),
1354+
& T_{cur} \\neq (2k+1)T_{max}; \\
1355+
\eta_{t+1} & = \eta_{t} + \\frac{1}{2}(\eta_{max} - \eta_{min})
1356+
\left(1 - \cos\left(\\frac{1}{T_{max}}\pi\\right)\\right),
13961357
& T_{cur} = (2k+1)T_{max}.
13971358
\end{aligned}
13981359
@@ -1417,13 +1378,12 @@ class CosineAnnealingLR(_LRScheduler):
14171378
import numpy as np
14181379
14191380
# train on default dynamic graph mode
1420-
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
14211381
linear = paddle.nn.Linear(10, 10)
14221382
scheduler = paddle.optimizer.lr_scheduler.CosineAnnealingLR(learning_rate=0.5, T_max=10, verbose=True)
1423-
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters())
1383+
sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters())
14241384
for epoch in range(20):
14251385
for batch_id in range(2):
1426-
x = paddle.to_tensor(x)
1386+
x = paddle.uniform([10, 10])
14271387
out = linear(x)
14281388
loss = paddle.reduce_mean(out)
14291389
loss.backward()

0 commit comments

Comments
 (0)