Merge pull request #17973 from SamuelMarks:keras.mixed_precision-defaults-to

tensorflower-gardener · tensorflower-gardener · commit 8261e7f1b4a6 · 2023-05-05T11:28:00.000-07:00
PiperOrigin-RevId: 529769399
diff --git a/keras/mixed_precision/loss_scale_optimizer.py b/keras/mixed_precision/loss_scale_optimizer.py
@@ -407,14 +407,14 @@ class BaseLossScaleOptimizer(metaclass=LossScaleOptimizerMetaclass):
     Args:
       inner_optimizer: The `tf.keras.optimizers.Optimizer` or
         `tf.keras.optimizers.experimental.Optimizer` instance to wrap.
-      dynamic: Bool indicating whether dynamic loss scaling is used. Defaults to
-        True. If True, the loss scale will be dynamically updated over time
-        using an algorithm that keeps the loss scale at approximately its
-        optimal value.  If False, a single fixed loss scale is used and
-        `initial_scale` must be specified, which is used as the loss scale.
+      dynamic: Bool indicating whether dynamic loss scaling is used. If `True`,
+        the loss scale will be dynamically updated over time using an algorithm
+        that keeps the loss scale at approximately its optimal value. If False,
+        a single fixed loss scale is used and  `initial_scale` must be
+        specified, which is used as the loss scale.
         Recommended to keep as True, as choosing a fixed loss scale can be
         tricky. Currently, there is a small performance overhead to dynamic loss
-        scaling compared to fixed loss scaling.
+        scaling compared to fixed loss scaling. Defaults to `True`.
       initial_scale: The initial loss scale. If `dynamic` is True, this defaults
         to `2 ** 15`. If `dynamic` is False, this must be specified and acts as
         the sole loss scale, as the loss scale does not change over time. When
@@ -423,11 +423,11 @@ class BaseLossScaleOptimizer(metaclass=LossScaleOptimizerMetaclass):
         quickly than a loss scale that is too low gets raised.
       dynamic_growth_steps: With dynamic loss scaling, every
         `dynamic_growth_steps` steps with finite gradients, the loss scale is
-        doubled. Defaults to 2000. If a nonfinite gradient is encountered, the
+        doubled. If a nonfinite gradient is encountered, the
         count is reset back to zero, gradients are skipped that step, and the
         loss scale is halved. The count can be queried with
         `LossScaleOptimizer.dynamic_counter`. This argument can only be
-        specified if `dynamic` is True.
+        specified if `dynamic` is True. Defaults to `2000`.
 
     `LossScaleOptimizer` will occasionally skip applying gradients to the
     variables, in which case the trainable variables will not change that step.