Skip to content

Commit 8568734

Browse files
authored
[AMP] add get() and set() for Grad_scaler (#33835)
* add get and set for Grad_scaler * refine some API name and comments * refine API name and comments * refine some comments
1 parent 54af52b commit 8568734

3 files changed

Lines changed: 427 additions & 0 deletions

File tree

python/paddle/amp/grad_scaler.py

Lines changed: 287 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,3 +145,290 @@ def minimize(self, optimizer, *args, **kwargs):
145145
optimizer.clear_grad()
146146
"""
147147
return super(GradScaler, self).minimize(optimizer, *args, **kwargs)
148+
149+
def is_enable(self):
150+
"""
151+
Enable loss scaling or not.
152+
153+
Returns:
154+
bool: enable loss scaling return True else return False.
155+
156+
Examples:
157+
.. code-block:: python
158+
159+
import paddle
160+
scaler = paddle.amp.GradScaler(enable=True,
161+
init_loss_scaling=1024,
162+
incr_ratio=2.0,
163+
decr_ratio=0.5,
164+
incr_every_n_steps=1000,
165+
decr_every_n_nan_or_inf=2,
166+
use_dynamic_loss_scaling=True)
167+
enable = scaler.is_enable()
168+
print(enable) # True
169+
"""
170+
return super(GradScaler, self).is_enable()
171+
172+
def is_use_dynamic_loss_scaling(self):
173+
"""
174+
Whether to use dynamic loss scaling.
175+
176+
Returns:
177+
bool: if fixed loss_scaling is used return False, if the loss scaling is updated dynamicly return true.
178+
179+
Examples:
180+
.. code-block:: python
181+
182+
import paddle
183+
scaler = paddle.amp.GradScaler(enable=True,
184+
init_loss_scaling=1024,
185+
incr_ratio=2.0,
186+
decr_ratio=0.5,
187+
incr_every_n_steps=1000,
188+
decr_every_n_nan_or_inf=2,
189+
use_dynamic_loss_scaling=True)
190+
use_dynamic_loss_scaling = scaler.is_use_dynamic_loss_scaling()
191+
print(use_dynamic_loss_scaling) # True
192+
"""
193+
return super(GradScaler, self).is_use_dynamic_loss_scaling()
194+
195+
def get_init_loss_scaling(self):
196+
"""
197+
Return the initial loss scaling factor.
198+
199+
Reurns:
200+
float: the initial loss scaling factor.
201+
202+
Examples:
203+
.. code-block:: python
204+
205+
import paddle
206+
scaler = paddle.amp.GradScaler(enable=True,
207+
init_loss_scaling=1024,
208+
incr_ratio=2.0,
209+
decr_ratio=0.5,
210+
incr_every_n_steps=1000,
211+
decr_every_n_nan_or_inf=2,
212+
use_dynamic_loss_scaling=True)
213+
init_loss_scaling = scaler.get_init_loss_scaling()
214+
print(init_loss_scaling) # 1024
215+
"""
216+
return super(GradScaler, self).get_init_loss_scaling()
217+
218+
def set_init_loss_scaling(self, new_init_loss_scaling):
219+
"""
220+
Set the initial loss scaling factor by `new_init_loss_scaling`.
221+
222+
Args:
223+
new_init_loss_scaling(int): The new_init_loss_scaling used to update initial loss scaling factor.
224+
225+
Examples:
226+
.. code-block:: python
227+
228+
import paddle
229+
scaler = paddle.amp.GradScaler(enable=True,
230+
init_loss_scaling=1024,
231+
incr_ratio=2.0,
232+
decr_ratio=0.5,
233+
incr_every_n_steps=1000,
234+
decr_every_n_nan_or_inf=2,
235+
use_dynamic_loss_scaling=True)
236+
print(scaler.get_init_loss_scaling()) # 1024
237+
new_init_loss_scaling = 1000
238+
scaler.set_init_loss_scaling(new_init_loss_scaling)
239+
print(scaler.get_init_loss_scaling()) # 1000
240+
"""
241+
super(GradScaler, self).set_init_loss_scaling(new_init_loss_scaling)
242+
243+
def get_incr_ratio(self):
244+
"""
245+
Return the multiplier to use when increasing the loss scaling.
246+
247+
Reurns:
248+
float: the multiplier to use when increasing the loss scaling.
249+
250+
Examples:
251+
.. code-block:: python
252+
253+
import paddle
254+
scaler = paddle.amp.GradScaler(enable=True,
255+
init_loss_scaling=1024,
256+
incr_ratio=2.0,
257+
decr_ratio=0.5,
258+
incr_every_n_steps=1000,
259+
decr_every_n_nan_or_inf=2,
260+
use_dynamic_loss_scaling=True)
261+
incr_ratio = scaler.get_incr_ratio()
262+
print(incr_ratio) # 2.0
263+
"""
264+
return super(GradScaler, self).get_incr_ratio()
265+
266+
def set_incr_ratio(self, new_incr_ratio):
267+
"""
268+
Set the multiplier to use when increasing the loss scaling by `new_incr_ratio`, `new_incr_ratio` should > 1.0.
269+
270+
Args:
271+
new_incr_ratio(float): The new_incr_ratio used to update the multiplier to use when increasing the loss scaling.
272+
273+
Examples:
274+
.. code-block:: python
275+
276+
import paddle
277+
scaler = paddle.amp.GradScaler(enable=True,
278+
init_loss_scaling=1024,
279+
incr_ratio=2.0,
280+
decr_ratio=0.5,
281+
incr_every_n_steps=1000,
282+
decr_every_n_nan_or_inf=2,
283+
use_dynamic_loss_scaling=True)
284+
print(scaler.get_incr_ratio()) # 2.0
285+
new_incr_ratio = 3.0
286+
scaler.set_incr_ratio(new_incr_ratio)
287+
print(scaler.get_incr_ratio()) # 3.0
288+
"""
289+
super(GradScaler, self).set_incr_ratio(new_incr_ratio)
290+
291+
def get_decr_ratio(self):
292+
"""
293+
Get the less-than-one-multiplier to use when decreasing the loss scaling.
294+
295+
Reurns:
296+
float: the less-than-one-multiplier to use when decreasing the loss scaling.
297+
298+
Examples:
299+
.. code-block:: python
300+
301+
import paddle
302+
scaler = paddle.amp.GradScaler(enable=True,
303+
init_loss_scaling=1024,
304+
incr_ratio=2.0,
305+
decr_ratio=0.5,
306+
incr_every_n_steps=1000,
307+
decr_every_n_nan_or_inf=2,
308+
use_dynamic_loss_scaling=True)
309+
decr_ratio = scaler.get_decr_ratio()
310+
print(decr_ratio) # 0.5
311+
"""
312+
return super(GradScaler, self).get_decr_ratio()
313+
314+
def set_decr_ratio(self, new_decr_ratio):
315+
"""
316+
Set the less-than-one-multiplier to use when decreasing the loss scaling by `new_incr_ratio`, `new_decr_ratio` should < 1.0.
317+
318+
Args:
319+
new_decr_ratio(float): The new_decr_ratio used to update the less-than-one-multiplier to use when decreasing the loss scaling.
320+
321+
Examples:
322+
.. code-block:: python
323+
324+
import paddle
325+
scaler = paddle.amp.GradScaler(enable=True,
326+
init_loss_scaling=1024,
327+
incr_ratio=2.0,
328+
decr_ratio=0.5,
329+
incr_every_n_steps=1000,
330+
decr_every_n_nan_or_inf=2,
331+
use_dynamic_loss_scaling=True)
332+
print(scaler.get_decr_ratio()) # 0.5
333+
new_decr_ratio = 0.1
334+
scaler.set_decr_ratio(new_decr_ratio)
335+
print(scaler.get_decr_ratio()) # 0.1
336+
"""
337+
super(GradScaler, self).set_decr_ratio(new_decr_ratio)
338+
339+
def get_incr_every_n_steps(self):
340+
"""
341+
Return the num `n`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients.
342+
343+
Reurns:
344+
int: the num `n`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients.
345+
346+
Examples:
347+
.. code-block:: python
348+
349+
import paddle
350+
scaler = paddle.amp.GradScaler(enable=True,
351+
init_loss_scaling=1024,
352+
incr_ratio=2.0,
353+
decr_ratio=0.5,
354+
incr_every_n_steps=1000,
355+
decr_every_n_nan_or_inf=2,
356+
use_dynamic_loss_scaling=True)
357+
incr_every_n_steps = scaler.get_incr_every_n_steps()
358+
print(incr_every_n_steps) # 1000
359+
"""
360+
return super(GradScaler, self).get_incr_every_n_steps()
361+
362+
def set_incr_every_n_steps(self, new_incr_every_n_steps):
363+
"""
364+
Set the num `n` by `new_incr_every_n_steps`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients.
365+
366+
Args:
367+
new_incr_every_n_steps(int): The new_incr_every_n_steps used to update the num `n`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients.
368+
369+
Examples:
370+
.. code-block:: python
371+
372+
import paddle
373+
scaler = paddle.amp.GradScaler(enable=True,
374+
init_loss_scaling=1024,
375+
incr_ratio=2.0,
376+
decr_ratio=0.5,
377+
incr_every_n_steps=1000,
378+
decr_every_n_nan_or_inf=2,
379+
use_dynamic_loss_scaling=True)
380+
print(scaler.get_incr_every_n_steps()) # 1000
381+
new_incr_every_n_steps = 2000
382+
scaler.set_incr_every_n_steps(new_incr_every_n_steps)
383+
print(scaler.get_incr_every_n_steps()) # 2000
384+
"""
385+
super(GradScaler, self).set_incr_every_n_steps(new_incr_every_n_steps)
386+
387+
def get_decr_every_n_nan_or_inf(self):
388+
"""
389+
Return the num `n`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients.
390+
391+
Reurns:
392+
int: the num `n`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients.
393+
394+
Examples:
395+
.. code-block:: python
396+
397+
import paddle
398+
scaler = paddle.amp.GradScaler(enable=True,
399+
init_loss_scaling=1024,
400+
incr_ratio=2.0,
401+
decr_ratio=0.5,
402+
incr_every_n_steps=1000,
403+
decr_every_n_nan_or_inf=2,
404+
use_dynamic_loss_scaling=True)
405+
decr_every_n_nan_or_inf = scaler.get_decr_every_n_nan_or_inf()
406+
print(decr_every_n_nan_or_inf) # 2
407+
"""
408+
return super(GradScaler, self).get_decr_every_n_nan_or_inf()
409+
410+
def set_decr_every_n_nan_or_inf(self, new_decr_every_n_nan_or_inf):
411+
"""
412+
Set the num `n` by `new_decr_every_n_nan_or_inf`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients.
413+
414+
Args:
415+
new_decr_every_n_nan_or_inf(int): The new_decr_every_n_nan_or_inf used to update the num `n`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients.
416+
417+
Examples:
418+
.. code-block:: python
419+
420+
import paddle
421+
scaler = paddle.amp.GradScaler(enable=True,
422+
init_loss_scaling=1024,
423+
incr_ratio=2.0,
424+
decr_ratio=0.5,
425+
incr_every_n_steps=1000,
426+
decr_every_n_nan_or_inf=2,
427+
use_dynamic_loss_scaling=True)
428+
print(scaler.get_decr_every_n_nan_or_inf()) # 2
429+
new_decr_every_n_nan_or_inf = 3
430+
scaler.set_decr_every_n_nan_or_inf(new_decr_every_n_nan_or_inf)
431+
print(scaler.get_decr_every_n_nan_or_inf()) # 3
432+
"""
433+
super(GradScaler,
434+
self).set_decr_every_n_nan_or_inf(new_decr_every_n_nan_or_inf)

0 commit comments

Comments
 (0)