Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions python/paddle/distributed/fleet/base/distributed_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,30 @@ def amp(self, flag):

@property
def amp_configs(self):
"""
Set automatic mixed precision training configurations. In general, amp has serveral configurable
settings that can be configured through a dict.

**Notes**:
**init_loss_scaling(float)**: The initial loss scaling factor. Default 32768.
**use_dynamic_loss_scaling(bool)**: Whether to use dynamic loss scaling. Default True.
**incr_every_n_steps(int)**: Increases loss scaling every n consecutive steps with finite gradients. Default 1000.
**decr_every_n_nan_or_inf(int)**: Decreases loss scaling every n accumulated steps with nan or inf gradients. Default 2.
**incr_ratio(float)**: The multiplier to use when increasing the loss scaling. Default 2.0.
**decr_ratio(float)**: The less-than-one-multiplier to use when decreasing the loss scaling. Default 0.5.
**custom_white_list(list[str])**: Users' custom white list which always execution fp16.
**custom_black_list(list[str])**: Users' custom black list which forbidden execution fp16.

Examples:
.. code-block:: python

import paddle.distributed.fleet as fleet
strategy = fleet.DistributedStrategy()
strategy.amp = True
strategy.amp_configs = {
"init_loss_scaling": 32768,
"custom_white_list": ['conv2d']}
"""
return get_msg_dict(self.strategy.amp_configs)

@amp_configs.setter
Expand Down Expand Up @@ -620,6 +644,20 @@ def localsgd_configs(self, configs):

@property
def dgc(self):
"""
Indicating whether we are using Deep Gradient Compression training. For more details, please refer to
[Deep Gradient Compression](https://arxiv.org/abs/1712.01887).

Default Value: False

Examples:
.. code-block:: python

import paddle.distributed.fleet as fleet
strategy = fleet.DistributedStrategy()
strategy.dgc = True # by default this is false

"""
return self.strategy.dgc

@dgc.setter
Expand All @@ -631,6 +669,28 @@ def dgc(self, flag):

@property
def dgc_configs(self):
"""
Set Deep Gradient Compression training configurations. In general, dgc has serveral configurable
settings that can be configured through a dict.

**Notes**:
**rampup_begin_step(int)**: The beginning step from which gradient compression is implemented. Default 0.
**rampup_step(int)**: Time steps used in sparsity warm-up periods. Default is 1.
For example, if the sparsity is [0.75, 0.9375, 0.984375, 0.996, 0.999], and the rampup_step is 100,
it will use 0.75 at 0~19 steps, and 0.9375 at 20~39 steps, and so on. And when reach sparsity array
ends, it will use 0.999 then and after.
**sparsity(list[float])**: Get top important element from gradient tensor, the ratio is (1 - sparsity).
Default is [0.999]. For example, if the sparsity is [0.99, 0.999], the top [1%, 0.1%] important
element will be transmitted.

Examples:
.. code-block:: python

import paddle.distributed.fleet as fleet
strategy = fleet.DistributedStrategy()
strategy.dgc = True
strategy.dgc_configs = {"rampup_begin_step": 1252}
"""
return get_msg_dict(self.strategy.dgc_configs)

@dgc_configs.setter
Expand Down