Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
254 changes: 254 additions & 0 deletions python/paddle/amp/grad_scaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,257 @@ def minimize(self, optimizer, *args, **kwargs):
optimizer.clear_grad()
"""
return super(GradScaler, self).minimize(optimizer, *args, **kwargs)

def is_use_loss_scaling(self):
"""
Enable loss scaling or not.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里需要加一个 Returns: 然后说明返回类型,以及什么情况下会返回什么样的值

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tks,done!

Examples:
.. code-block:: python
import paddle
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.. code & import 这两行中间需要加一个空行 否则预览会有问题

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tks,done!

scaler = paddle.amp.GradScaler(enable=True,
init_loss_scaling=1024,
incr_ratio=2.0,
decr_ratio=0.5,
incr_every_n_steps=1000,
decr_every_n_nan_or_inf=2,
use_dynamic_loss_scaling=True)
enable = scaler.get_enable()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

scaler.get_enable -> scaler.is_enable

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tks,done.

print(enable) # True
"""
return super(GradScaler, self).is_use_loss_scaling()

def is_use_dynamic_loss_scaling(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is_use_dynamic_loss_scaling -> is_dynamic_loss_scaling_used?

"""
Whether to use dynamic loss scaling.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

同上,需要补充 Returns

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tks,done!

Examples:
.. code-block:: python
import paddle
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

同上

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tks,done!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

加空行

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tks. done.

scaler = paddle.amp.GradScaler(enable=True,
init_loss_scaling=1024,
incr_ratio=2.0,
decr_ratio=0.5,
incr_every_n_steps=1000,
decr_every_n_nan_or_inf=2,
use_dynamic_loss_scaling=True)
use_dynamic_loss_scaling = scaler.get_use_dynamic_loss_scaling()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

scaler.get_use_dynamic_loss_scaling -> scaler.is_use_dynamic_loss_scaling

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tks,done.

print(use_dynamic_loss_scaling) # True
"""
return super(GradScaler, self).is_use_dynamic_loss_scaling()

def get_init_loss_scaling(self):
"""
Return the initial loss scaling factor.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

需要加Returns 同上

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tks,done!

Examples:
.. code-block:: python
import paddle
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

同上

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tks,done!

scaler = paddle.amp.GradScaler(enable=True,
init_loss_scaling=1024,
incr_ratio=2.0,
decr_ratio=0.5,
incr_every_n_steps=1000,
decr_every_n_nan_or_inf=2,
use_dynamic_loss_scaling=True)
init_loss_scaling = scaler.get_init_loss_scaling()
print(init_loss_scaling) # 1024
"""
return super(GradScaler, self).get_init_loss_scaling()

def set_init_loss_scaling(self, new_init_loss_scaling):
"""
Set the initial loss scaling factor by `new_init_loss_scaling`.

Args:
new_init_loss_scaling(int): The new_init_loss_scaling used to update initial loss scaling factor.

Examples:
.. code-block:: python
import paddle
scaler = paddle.amp.GradScaler(enable=True,
init_loss_scaling=1024,
incr_ratio=2.0,
decr_ratio=0.5,
incr_every_n_steps=1000,
decr_every_n_nan_or_inf=2,
use_dynamic_loss_scaling=True)
print(scaler.get_init_loss_scaling()) # 1024
new_init_loss_scaling = 1000
scaler.set_init_loss_scaling(new_init_loss_scaling)
print(scaler.get_init_loss_scaling()) # 1000
"""
super(GradScaler, self).set_init_loss_scaling(new_init_loss_scaling)

def get_incr_ratio(self):
"""
Return the multiplier to use when increasing the loss scaling.

Examples:
.. code-block:: python
import paddle
scaler = paddle.amp.GradScaler(enable=True,
init_loss_scaling=1024,
incr_ratio=2.0,
decr_ratio=0.5,
incr_every_n_steps=1000,
decr_every_n_nan_or_inf=2,
use_dynamic_loss_scaling=True)
incr_ratio = scaler.get_incr_ratio()
print(incr_ratio) # 2.0
"""
return super(GradScaler, self).get_incr_ratio()

def set_incr_ratio(self, new_incr_ratio):
"""
Set the multiplier to use when increasing the loss scaling by `new_incr_ratio`, `new_incr_ratio` should > 1.0.

Args:
new_incr_ratio(float): The new_incr_ratio used to update the multiplier to use when increasing the loss scaling.

Examples:
.. code-block:: python
import paddle
scaler = paddle.amp.GradScaler(enable=True,
init_loss_scaling=1024,
incr_ratio=2.0,
decr_ratio=0.5,
incr_every_n_steps=1000,
decr_every_n_nan_or_inf=2,
use_dynamic_loss_scaling=True)
print(scaler.get_incr_ratio()) # 2.0
new_incr_ratio = 3.0
scaler.set_incr_ratio(new_incr_ratio)
print(scaler.get_incr_ratio()) # 3.0
"""
super(GradScaler, self).set_incr_ratio(new_incr_ratio)

def get_decr_ratio(self):
"""
Get the less-than-one-multiplier to use when decreasing the loss scaling.

Examples:
.. code-block:: python
import paddle
scaler = paddle.amp.GradScaler(enable=True,
init_loss_scaling=1024,
incr_ratio=2.0,
decr_ratio=0.5,
incr_every_n_steps=1000,
decr_every_n_nan_or_inf=2,
use_dynamic_loss_scaling=True)
decr_ratio = scaler.get_decr_ratio()
print(decr_ratio) # 0.5
"""
return super(GradScaler, self).get_decr_ratio()

def set_decr_ratio(self, new_decr_ratio):
"""
Set the less-than-one-multiplier to use when decreasing the loss scaling by `new_incr_ratio`, `new_decr_ratio` should < 1.0.

Args:
new_decr_ratio(float): The new_decr_ratio used to update the less-than-one-multiplier to use when decreasing the loss scaling.

Examples:
.. code-block:: python
import paddle
scaler = paddle.amp.GradScaler(enable=True,
init_loss_scaling=1024,
incr_ratio=2.0,
decr_ratio=0.5,
incr_every_n_steps=1000,
decr_every_n_nan_or_inf=2,
use_dynamic_loss_scaling=True)
print(scaler.get_decr_ratio()) # 0.5
new_decr_ratio = 0.1
scaler.set_decr_ratio(new_decr_ratio)
print(scaler.get_decr_ratio()) # 0.1
"""
super(GradScaler, self).set_decr_ratio(new_decr_ratio)

def get_incr_every_n_steps(self):
"""
Return the num `n`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients.

Examples:
.. code-block:: python
import paddle
scaler = paddle.amp.GradScaler(enable=True,
init_loss_scaling=1024,
incr_ratio=2.0,
decr_ratio=0.5,
incr_every_n_steps=1000,
decr_every_n_nan_or_inf=2,
use_dynamic_loss_scaling=True)
incr_every_n_steps = scaler.get_incr_every_n_steps()
print(incr_every_n_steps) # 1000
"""
return super(GradScaler, self).get_incr_every_n_steps()

def set_incr_every_n_steps(self, new_incr_every_n_steps):
"""
Set the num `n` by `new_incr_every_n_steps`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients.

Args:
new_incr_every_n_steps(int): The new_incr_every_n_steps used to update the num `n`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients.

Examples:
.. code-block:: python
import paddle
scaler = paddle.amp.GradScaler(enable=True,
init_loss_scaling=1024,
incr_ratio=2.0,
decr_ratio=0.5,
incr_every_n_steps=1000,
decr_every_n_nan_or_inf=2,
use_dynamic_loss_scaling=True)
print(scaler.get_incr_every_n_steps()) # 1000
new_incr_every_n_steps = 2000
scaler.set_incr_every_n_steps(new_incr_every_n_steps)
print(scaler.get_incr_every_n_steps()) # 2000
"""
super(GradScaler, self).set_incr_every_n_steps(new_incr_every_n_steps)

def get_decr_every_n_nan_or_inf(self):
"""
Return the num `n`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients.

Examples:
.. code-block:: python
import paddle
scaler = paddle.amp.GradScaler(enable=True,
init_loss_scaling=1024,
incr_ratio=2.0,
decr_ratio=0.5,
incr_every_n_steps=1000,
decr_every_n_nan_or_inf=2,
use_dynamic_loss_scaling=True)
decr_every_n_nan_or_inf = scaler.get_decr_every_n_nan_or_inf()
print(decr_every_n_nan_or_inf) # 2
"""
return super(GradScaler, self).get_decr_every_n_nan_or_inf()

def set_decr_every_n_nan_or_inf(self, new_decr_every_n_nan_or_inf):
"""
Set the num `n` by `new_decr_every_n_nan_or_inf`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients.

Args:
new_decr_every_n_nan_or_inf(int): The new_decr_every_n_nan_or_inf used to update the num `n`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients.

Examples:
.. code-block:: python
import paddle
scaler = paddle.amp.GradScaler(enable=True,
init_loss_scaling=1024,
incr_ratio=2.0,
decr_ratio=0.5,
incr_every_n_steps=1000,
decr_every_n_nan_or_inf=2,
use_dynamic_loss_scaling=True)
print(scaler.get_decr_every_n_nan_or_inf()) # 2
new_decr_every_n_nan_or_inf = 3
scaler.set_decr_every_n_nan_or_inf(new_decr_every_n_nan_or_inf)
print(scaler.get_decr_every_n_nan_or_inf()) # 3
"""
super(GradScaler,
self).set_decr_every_n_nan_or_inf(new_decr_every_n_nan_or_inf)
91 changes: 91 additions & 0 deletions python/paddle/fluid/dygraph/amp/loss_scaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,3 +244,94 @@ def _update(self):
self._incr_count = 0

return

def is_use_loss_scaling(self):
"""
Enable loss scaling or not.
"""
return self._enable

def is_use_dynamic_loss_scaling(self):
"""
Whether to use dynamic loss scaling.
"""
return self._use_dynamic_loss_scaling

def get_init_loss_scaling(self):
"""
Return the initial loss scaling factor.
"""
return self._init_loss_scaling

def set_init_loss_scaling(self, new_init_loss_scaling):
"""
Set the initial loss scaling factor by `new_init_loss_scaling`.

Args:
new_init_loss_scaling(int): The new_init_loss_scaling used to update initial loss scaling factor.s
"""
self._init_loss_scaling = new_init_loss_scaling
self._scale = to_variable(
np.array([self._init_loss_scaling]).astype(np.float32))

def get_incr_ratio(self):
"""
Return the multiplier to use when increasing the loss scaling.
"""
return self._incr_ratio

def set_incr_ratio(self, new_incr_ratio):
"""
Set the multiplier to use when increasing the loss scaling by `new_incr_ratio`, `new_incr_ratio` should > 1.0.

Args:
new_incr_ratio(float): The new_incr_ratio used to update the multiplier to use when increasing the loss scaling.
"""
assert new_incr_ratio > 1.0, "The new_incr_ratio must be > 1.0."
self._incr_ratio = new_incr_ratio

def get_decr_ratio(self):
"""
Get the less-than-one-multiplier to use when decreasing the loss scaling.
"""
return self._decr_ratio

def set_decr_ratio(self, new_decr_ratio):
"""
Set the less-than-one-multiplier to use when decreasing the loss scaling by `new_incr_ratio`, `new_decr_ratio` should < 1.0.

Args:
new_decr_ratio(float): The new_decr_ratio used to update the less-than-one-multiplier to use when decreasing the loss scaling.
"""
assert new_decr_ratio < 1.0, "The new_decr_ratio must be < 1.0."
self._decr_ratio = new_decr_ratio

def get_incr_every_n_steps(self):
"""
Return the num `n`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients.
"""
return self._incr_every_n_steps

def set_incr_every_n_steps(self, new_incr_every_n_steps):
"""
Set the num `n` by `new_incr_every_n_steps`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients.

Args:
new_incr_every_n_steps(int): The new_incr_every_n_steps used to update the num `n`, `n` represent increases loss scaling every `n` consecutive steps with finite gradients.
"""
self._incr_every_n_steps = new_incr_every_n_steps

def get_decr_every_n_nan_or_inf(self):
"""
Return the num `n`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients.
"""
return self._decr_every_n_nan_or_inf

def set_decr_every_n_nan_or_inf(self, new_decr_every_n_nan_or_inf):
"""
Set the num `n` by `new_decr_every_n_nan_or_inf`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients.

Args:
new_decr_every_n_nan_or_inf(int): The new_decr_every_n_nan_or_inf used to update the num `n`, `n` represent decreases loss scaling every `n` accumulated steps with nan or inf gradients.
"""
self._decr_every_n_nan_or_inf = new_decr_every_n_nan_or_inf
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,34 @@ def test_nan_inf(self):
self.assertTrue(
np.array_equal(param.numpy(), params_init[param.name]))

def test_get_and_set(self):
with fluid.dygraph.guard():
scaler = paddle.amp.GradScaler(
enable=True,
init_loss_scaling=1024,
incr_ratio=2.0,
decr_ratio=0.5,
incr_every_n_steps=1000,
decr_every_n_nan_or_inf=2,
use_dynamic_loss_scaling=True)
self.assertEqual(scaler.is_use_loss_scaling() == True, True)
self.assertEqual(scaler.get_init_loss_scaling() == 1024, True)
self.assertEqual(scaler.get_incr_ratio() == 2.0, True)
self.assertEqual(scaler.get_decr_ratio() == 0.5, True)
self.assertEqual(scaler.get_incr_every_n_steps() == 1000, True)
self.assertEqual(scaler.get_decr_every_n_nan_or_inf() == 2, True)
self.assertEqual(scaler.is_use_dynamic_loss_scaling() == True, True)
scaler.set_decr_every_n_nan_or_inf(4)
self.assertEqual(scaler.get_decr_every_n_nan_or_inf() == 4, True)
scaler.set_decr_ratio(0.1)
self.assertEqual(scaler.get_decr_ratio() == 0.1, True)
scaler.set_incr_every_n_steps(200)
self.assertEqual(scaler.get_incr_every_n_steps() == 200, True)
scaler.set_incr_ratio(3.0)
self.assertEqual(scaler.get_incr_ratio() == 3.0, True)
scaler.set_init_loss_scaling(100)
self.assertEqual(scaler.get_init_loss_scaling() == 100, True)


def reader_decorator(reader):
def __reader__():
Expand Down