2626
2727class _LRScheduler (object ):
2828 """
29- LRScheduler Base class.
3029
31- Define the common interface of an learning rate scheduler.
30+ LRScheduler Base class. Define the common interface of a learning rate scheduler.
3231
3332 User can import it by ``form paddle.optimizer.lr_scheduler import _LRScheduler`` ,
3433
@@ -97,6 +96,7 @@ def __call__(self):
9796
9897 def step (self , epoch = None ):
9998 """
99+
100100 ``step`` should be called after ``optimizer.step`` . It will update the learning rate in optimizer according to current ``epoch`` .
101101 The new learning rate will take effect on next ``optimizer.step`` .
102102
@@ -105,29 +105,7 @@ def step(self, epoch=None):
105105
106106 Returns:
107107 None
108-
109- Examples:
110- Please refer to the subclass of ``_LRScheduler`` (Base Class). ``StepLR`` is used as an example here.
111-
112- .. code-block:: python
113- import paddle
114- import numpy as np
115-
116- # train on default dynamic graph mode
117- paddle.disable_static()
118- x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
119- linear = paddle.nn.Linear(10, 10)
120- scheduler = paddle.optimizer.lr_scheduler.StepLR(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True)
121- sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters())
122- for epoch in range(20):
123- for batch_id in range(2):
124- x = paddle.to_tensor(x)
125- out = linear(x)
126- loss = paddle.reduce_mean(out)
127- loss.backward()
128- sgd.step()
129- sgd.clear_gradients()
130- scheduler.step()
108+
131109 """
132110 if epoch is None :
133111 self .last_epoch += 1
@@ -145,9 +123,10 @@ def step(self, epoch=None):
145123
146124 def state_dict (self ):
147125 """
126+
148127 Returns the state of the scheduler as a :class:`dict`.
149128
150- It is a subset of self.__dict__ .
129+ It is a subset of `` self.__dict__`` .
151130 """
152131 self ._state_keys ()
153132 state_dict = {}
@@ -169,17 +148,19 @@ def state_dict(self):
169148 # (Note): you can change it for your subclass.
170149 def _state_keys (self ):
171150 """
151+
172152 For those subclass who overload ``_LRScheduler`` (Base Class). Acquiescently, "last_epoch, last_lr" will be saved by ``self.keys = ['last_epoch', 'last_lr']`` .
173153
174154 ``last_epoch`` is the current epoch num, and ``last_lr`` is the current learning rate.
175155
176- User can change the default behavior by redefining the dict ``self.keys`` .
156+ If you want to change the default behavior, you should have a custom implementation of ``_state_keys()`` to redefine ``self.keys`` .
177157
178158 """
179159 self .keys = ['last_epoch' , 'last_lr' ]
180160
181161 def set_state_dict (self , state_dict ):
182162 """
163+
183164 Loads the schedulers state.
184165 """
185166 self ._state_keys ()
@@ -200,6 +181,7 @@ def set_state_dict(self, state_dict):
200181
201182 def get_lr (self ):
202183 """
184+
203185 For those subclass who overload ``_LRScheduler`` (Base Class), User should have a custom implementation of ``get_lr()`` .
204186
205187 Otherwise, an ``NotImplementedError`` exception will be thrown.
@@ -239,14 +221,12 @@ class NoamLR(_LRScheduler):
239221 import numpy as np
240222
241223 # train on default dynamic graph mode
242- paddle.disable_static()
243- x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
244224 linear = paddle.nn.Linear(10, 10)
245225 scheduler = paddle.optimizer.lr_scheduler.NoamLR(d_model=0.01, warmup_steps=100, verbose=True)
246- sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list =linear.parameters())
226+ sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters =linear.parameters())
247227 for epoch in range(20):
248228 for batch_id in range(2):
249- x = paddle.to_tensor(x )
229+ x = paddle.uniform([10, 10] )
250230 out = linear(x)
251231 loss = paddle.reduce_mean(out)
252232 loss.backward()
@@ -337,14 +317,12 @@ class PiecewiseLR(_LRScheduler):
337317 import numpy as np
338318
339319 # train on default dynamic graph mode
340- paddle.disable_static()
341- x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
342320 linear = paddle.nn.Linear(10, 10)
343321 scheduler = paddle.optimizer.lr_scheduler.PiecewiseLR(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True)
344- sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list =linear.parameters())
322+ sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters =linear.parameters())
345323 for epoch in range(20):
346324 for batch_id in range(2):
347- x = paddle.to_tensor(x )
325+ x = paddle.uniform([10, 10] )
348326 out = linear(x)
349327 loss = paddle.reduce_mean(out)
350328 loss.backward()
@@ -421,14 +399,12 @@ class NaturalExpLR(_LRScheduler):
421399 import numpy as np
422400
423401 # train on default dynamic graph mode
424- paddle.disable_static()
425- x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
426402 linear = paddle.nn.Linear(10, 10)
427403 scheduler = paddle.optimizer.lr_scheduler.NaturalExpLR(learning_rate=0.5, gamma=0.1, verbose=True)
428- sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list =linear.parameters())
404+ sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters =linear.parameters())
429405 for epoch in range(20):
430406 for batch_id in range(2):
431- x = paddle.to_tensor(x )
407+ x = paddle.uniform([10, 10] )
432408 out = linear(x)
433409 loss = paddle.reduce_mean(out)
434410 loss.backward()
@@ -500,14 +476,12 @@ class InverseTimeLR(_LRScheduler):
500476 import numpy as np
501477
502478 # train on default dynamic graph mode
503- paddle.disable_static()
504- x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
505479 linear = paddle.nn.Linear(10, 10)
506480 scheduler = paddle.optimizer.lr_scheduler.InverseTimeLR(learning_rate=0.5, gamma=0.1, verbose=True)
507- sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list =linear.parameters())
481+ sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters =linear.parameters())
508482 for epoch in range(20):
509483 for batch_id in range(2):
510- x = paddle.to_tensor(x )
484+ x = paddle.uniform([10, 10] )
511485 out = linear(x)
512486 loss = paddle.reduce_mean(out)
513487 loss.backward()
@@ -596,14 +570,12 @@ class PolynomialLR(_LRScheduler):
596570 import numpy as np
597571
598572 # train on default dynamic graph mode
599- paddle.disable_static()
600- x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
601573 linear = paddle.nn.Linear(10, 10)
602574 scheduler = paddle.optimizer.lr_scheduler.PolynomialLR(learning_rate=0.5, decay_steps=20, verbose=True)
603- sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list =linear.parameters())
575+ sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters =linear.parameters())
604576 for epoch in range(20):
605577 for batch_id in range(2):
606- x = paddle.to_tensor(x )
578+ x = paddle.uniform([10, 10] )
607579 out = linear(x)
608580 loss = paddle.reduce_mean(out)
609581 loss.backward()
@@ -678,15 +650,15 @@ class LinearLrWarmup(_LRScheduler):
678650
679651 When epoch < warmup_steps, learning rate is updated as:
680652
681- .. code-block:: text
653+ .. math::
682654
683- lr = start_lr + (end_lr - start_lr ) * ( epoch / warmup_steps)
655+ lr = start\_lr + (end\_lr - start\_lr ) * \\ frac{ epoch}{warmup\_steps}
684656
685657 where start_lr is the initial learning rate, and end_lr is the final learning rate;
686658
687659 When epoch >= warmup_steps, learning rate is updated as:
688660
689- .. code-block:: text
661+ .. math::
690662
691663 lr = learning_rate
692664
@@ -711,15 +683,13 @@ class LinearLrWarmup(_LRScheduler):
711683 import numpy as np
712684
713685 # train on default dynamic graph mode
714- paddle.disable_static()
715- x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
716686 linear = paddle.nn.Linear(10, 10)
717687 scheduler = paddle.optimizer.LinearLrWarmup(
718688 learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True)
719- sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list =linear.parameters())
689+ sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters =linear.parameters())
720690 for epoch in range(20):
721691 for batch_id in range(2):
722- x = paddle.to_tensor(x )
692+ x = paddle.uniform([10, 10] )
723693 out = linear(x)
724694 loss = paddle.reduce_mean(out)
725695 loss.backward()
@@ -791,7 +761,7 @@ def get_lr(self):
791761class ExponentialLR (_LRScheduler ):
792762 """
793763
794- Update learning rate by ' gamma' each epoch.
764+ Update learning rate by ` gamma` each epoch.
795765
796766 The algorithm can be described as following.
797767
@@ -817,14 +787,12 @@ class ExponentialLR(_LRScheduler):
817787 import numpy as np
818788
819789 # train on default dynamic graph mode
820- paddle.disable_static()
821- x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
822790 linear = paddle.nn.Linear(10, 10)
823791 scheduler = paddle.optimizer.lr_scheduler.ExponentialLR(learning_rate=0.5, gamma=0.9, verbose=True)
824- sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list =linear.parameters())
792+ sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters =linear.parameters())
825793 for epoch in range(20):
826794 for batch_id in range(2):
827- x = paddle.to_tensor(x )
795+ x = paddle.uniform([10, 10] )
828796 out = linear(x)
829797 loss = paddle.reduce_mean(out)
830798 loss.backward()
@@ -905,14 +873,12 @@ class MultiStepLR(_LRScheduler):
905873 import numpy as np
906874
907875 # train on default dynamic graph mode
908- paddle.disable_static()
909- x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
910876 linear = paddle.nn.Linear(10, 10)
911877 scheduler = paddle.optimizer.lr_scheduler.MultiStepLR(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True)
912- sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list =linear.parameters())
878+ sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters =linear.parameters())
913879 for epoch in range(20):
914880 for batch_id in range(2):
915- x = paddle.to_tensor(x )
881+ x = paddle.uniform([10, 10] )
916882 out = linear(x)
917883 loss = paddle.reduce_mean(out)
918884 loss.backward()
@@ -1014,14 +980,12 @@ class StepLR(_LRScheduler):
1014980 import numpy as np
1015981
1016982 # train on default dynamic graph mode
1017- paddle.disable_static()
1018- x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
1019983 linear = paddle.nn.Linear(10, 10)
1020984 scheduler = paddle.optimizer.lr_scheduler.StepLR(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True)
1021- sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list =linear.parameters())
985+ sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters =linear.parameters())
1022986 for epoch in range(20):
1023987 for batch_id in range(2):
1024- x = paddle.to_tensor(x )
988+ x = paddle.uniform([10, 10] )
1025989 out = linear(x)
1026990 loss = paddle.reduce_mean(out)
1027991 loss.backward()
@@ -1089,9 +1053,9 @@ class LambdaLR(_LRScheduler):
10891053 learning_rate = 0.5 # init learning_rate
10901054 lr_lambda = lambda epoch: 0.95 ** epoch
10911055
1092- learning_rate = 0.5 # epoch 0
1093- learning_rate = 0.475 # epoch 1
1094- learning_rate = 0.45125 # epoch 2
1056+ learning_rate = 0.5 # epoch 0, 0.5*0.95**0
1057+ learning_rate = 0.475 # epoch 1, 0.5*0.95**1
1058+ learning_rate = 0.45125 # epoch 2, 0.5*0.95**2
10951059
10961060 Args:
10971061 learning_rate (float): The initial learning rate. It is a python float number.
@@ -1110,14 +1074,12 @@ class LambdaLR(_LRScheduler):
11101074 import numpy as np
11111075
11121076 # train on default dynamic graph mode
1113- paddle.disable_static()
1114- x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
11151077 linear = paddle.nn.Linear(10, 10)
11161078 scheduler = paddle.optimizer.lr_scheduler.LambdaLR(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True)
1117- sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list =linear.parameters())
1079+ sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters =linear.parameters())
11181080 for epoch in range(20):
11191081 for batch_id in range(2):
1120- x = paddle.to_tensor(x )
1082+ x = paddle.uniform([10, 10] )
11211083 out = linear(x)
11221084 loss = paddle.reduce_mean(out)
11231085 loss.backward()
@@ -1210,14 +1172,12 @@ class ReduceLROnPlateau(_LRScheduler):
12101172 import numpy as np
12111173
12121174 # train on default dynamic graph mode
1213- paddle.disable_static()
1214- x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
12151175 linear = paddle.nn.Linear(10, 10)
12161176 scheduler = paddle.optimizer.lr_scheduler.ReduceLROnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True)
1217- sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list =linear.parameters())
1177+ sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters =linear.parameters())
12181178 for epoch in range(20):
12191179 for batch_id in range(2):
1220- x = paddle.to_tensor(x )
1180+ x = paddle.uniform([10, 10] )
12211181 out = linear(x)
12221182 loss = paddle.reduce_mean(out)
12231183 loss.backward()
@@ -1312,7 +1272,7 @@ def _state_keys(self):
13121272
13131273 def step (self , metrics , epoch = None ):
13141274 """
1315- step should be called after 'minimize' . It will update the learning rate in optimizer according to ``metrics`` .
1275+ step should be called after `optimizer.step()` . It will update the learning rate in optimizer according to ``metrics`` .
13161276 The new learning rate will take effect on next epoch.
13171277
13181278 Args:
@@ -1387,12 +1347,13 @@ class CosineAnnealingLR(_LRScheduler):
13871347 The algorithm can be described as following.
13881348
13891349 .. math::
1390- \b egin{aligned}
1391- \eta_t & = \eta_{min} + \f rac{1}{2}(\eta_{max} - \eta_{min})\left(1
1392- + \cos\left(\f rac{T_{cur}}{T_{max}}\pi\r ight)\r ight),
1393- & T_{cur} \n eq (2k+1)T_{max}; \\
1394- \eta_{t+1} & = \eta_{t} + \f rac{1}{2}(\eta_{max} - \eta_{min})
1395- \left(1 - \cos\left(\f rac{1}{T_{max}}\pi\r ight)\r ight),
1350+
1351+ \\ begin{aligned}
1352+ \eta_t & = \eta_{min} + \\ frac{1}{2}(\eta_{max} - \eta_{min})\left(1
1353+ + \cos\left(\\ frac{T_{cur}}{T_{max}}\pi\\ right)\\ right),
1354+ & T_{cur} \\ neq (2k+1)T_{max}; \\
1355+ \eta_{t+1} & = \eta_{t} + \\ frac{1}{2}(\eta_{max} - \eta_{min})
1356+ \left(1 - \cos\left(\\ frac{1}{T_{max}}\pi\\ right)\\ right),
13961357 & T_{cur} = (2k+1)T_{max}.
13971358 \end{aligned}
13981359
@@ -1417,13 +1378,12 @@ class CosineAnnealingLR(_LRScheduler):
14171378 import numpy as np
14181379
14191380 # train on default dynamic graph mode
1420- x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
14211381 linear = paddle.nn.Linear(10, 10)
14221382 scheduler = paddle.optimizer.lr_scheduler.CosineAnnealingLR(learning_rate=0.5, T_max=10, verbose=True)
1423- sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list =linear.parameters())
1383+ sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters =linear.parameters())
14241384 for epoch in range(20):
14251385 for batch_id in range(2):
1426- x = paddle.to_tensor(x )
1386+ x = paddle.uniform([10, 10] )
14271387 out = linear(x)
14281388 loss = paddle.reduce_mean(out)
14291389 loss.backward()
0 commit comments