format code

anw90 · anw90 · commit 7377e418ad49 · 2023-12-13T20:26:42.000+08:00
diff --git a/src/accelerate/optimizer.py b/src/accelerate/optimizer.py
@@ -122,7 +122,10 @@ def zero_grad(self, set_to_none=None):
                 self.optimizer.zero_grad()
 
     def step(self, closure=None):
-        if not self.gradient_state.is_xla_gradients_synced and self.accelerator_state.distributed_type == DistributedType.XLA:
+        if (
+            not self.gradient_state.is_xla_gradients_synced
+            and self.accelerator_state.distributed_type == DistributedType.XLA
+        ):
             gradients = xm._fetch_gradients(self.optimizer)
             xm.all_reduce("sum", gradients, scale=1.0 / xm.xrt_world_size())
             self.gradient_state.is_xla_gradients_synced = True
diff --git a/src/accelerate/scheduler.py b/src/accelerate/scheduler.py
@@ -16,7 +16,7 @@
 
 import warnings
 
-from .state import AcceleratorState, DistributedType, GradientState
+from .state import AcceleratorState, GradientState
 
 
 warnings.filterwarnings("ignore", category=UserWarning, module="torch.optim.lr_scheduler")
diff --git a/src/accelerate/state.py b/src/accelerate/state.py
@@ -990,10 +990,10 @@ class GradientState:
             accumulation
         - **sync_with_dataloader** (`bool`) -- Whether the gradients should be synced at the end of the dataloader
             iteration and the number of total steps reset
-        - **is_xla_gradients_synced** (`bool`) -- Whether the XLA gradients have been synchronized. It is initialized as
-            False. Once gradients have been reduced before the optimizer step, this flag is set to True.
-            Subsequently, after each step, the flag is reset to False. FSDP will always synchronize the gradients,
-            hence is_xla_gradients_synced is always true.
+        - **is_xla_gradients_synced** (`bool`) -- Whether the XLA gradients have been synchronized. It is initialized
+          as false. Once gradients have been reduced before the optimizer step, this flag is set to true. Subsequently,
+            after each step, the flag is reset to false. FSDP will always synchronize the gradients, hence
+            is_xla_gradients_synced is always true.
     """
 
     _shared_state = SharedDict()
diff --git a/src/accelerate/test_utils/__init__.py b/src/accelerate/test_utils/__init__.py
@@ -12,7 +12,7 @@
     require_multi_gpu,
     require_multi_xpu,
     require_non_cpu,
-    require_no_torch_xla,
+    require_non_torch_xla,
     require_single_device,
     require_single_gpu,
     require_single_xpu,
diff --git a/src/accelerate/test_utils/scripts/external_deps/test_metrics.py b/src/accelerate/test_utils/scripts/external_deps/test_metrics.py
@@ -28,7 +28,7 @@
 from accelerate import Accelerator, DistributedType
 from accelerate.data_loader import DataLoaderDispatcher
 from accelerate.test_utils import RegressionDataset, RegressionModel, torch_device
-from accelerate.utils import is_torch_xla_available, set_seed
+from accelerate.utils import set_seed
 
 
 os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "true"
diff --git a/src/accelerate/test_utils/testing.py b/src/accelerate/test_utils/testing.py
@@ -175,7 +175,7 @@ def require_tpu(test_case):
     return unittest.skipUnless(is_torch_xla_available(check_is_tpu=True), "test requires TPU")(test_case)
 
 
-def require_no_torch_xla(test_case):
+def require_non_torch_xla(test_case):
     """
     Decorator marking a test as requiring an environment without TorchXLA. These tests are skipped when TorchXLA is
     available.
diff --git a/tests/fsdp/test_fsdp.py b/tests/fsdp/test_fsdp.py
@@ -31,7 +31,7 @@
     require_cuda,
     require_fsdp,
     require_multi_gpu,
-    require_no_torch_xla,
+    require_non_torch_xla,
     slow,
 )
 from accelerate.utils.constants import (
@@ -171,7 +171,7 @@ def test_cpu_offload(self):
 
 
 # Skip this test when TorchXLA is available because accelerate.launch does not support TorchXLA FSDP.
-@require_no_torch_xla
+@require_non_torch_xla
 @require_fsdp
 @require_multi_gpu
 @slow
diff --git a/tests/test_accelerator.py b/tests/test_accelerator.py
@@ -12,7 +12,7 @@
 from accelerate.accelerator import Accelerator
 from accelerate.state import GradientState, PartialState
 from accelerate.test_utils import require_bnb, require_multi_gpu, slow
-from accelerate.test_utils.testing import AccelerateTestCase, require_cuda, require_no_torch_xla
+from accelerate.test_utils.testing import AccelerateTestCase, require_cuda, require_non_torch_xla
 from accelerate.utils import patch_environment
 from accelerate.utils.modeling import load_checkpoint_in_model
 
@@ -63,7 +63,6 @@ def test_accelerator_can_be_reinstantiated(self):
         with self.assertRaises(ValueError):
             _ = Accelerator(cpu=True)
 
-
     def test_mutable_states(self):
         accelerator = Accelerator()
         state = GradientState()
@@ -105,7 +104,7 @@ def test_free_memory_dereferences_prepared_components(self):
         self.assertTrue(len(accelerator._schedulers) == 0)
         self.assertTrue(len(accelerator._dataloaders) == 0)
 
-    @require_no_torch_xla
+    @require_non_torch_xla
     def test_env_var_device(self):
         """Tests that setting the torch device with ACCELERATE_TORCH_DEVICE overrides default device."""
         PartialState._reset_state()
@@ -285,7 +284,7 @@ def test_is_accelerator_prepared(self):
             "Valid Dataloader is missing `_is_accelerator_prepared` or is set to `False`",
         )
 
-    @require_no_torch_xla
+    @require_non_torch_xla
     @slow
     @require_bnb
     def test_accelerator_bnb(self):
@@ -302,7 +301,7 @@ def test_accelerator_bnb(self):
         # This should work
         model = accelerator.prepare(model)
 
-    @require_no_torch_xla
+    @require_non_torch_xla
     @slow
     @require_bnb
     def test_accelerator_bnb_cpu_error(self):
@@ -328,7 +327,7 @@ def test_accelerator_bnb_cpu_error(self):
         with self.assertRaises(ValueError):
             model = accelerator.prepare(model)
 
-    @require_no_torch_xla
+    @require_non_torch_xla
     @slow
     @require_bnb
     @require_multi_gpu
@@ -359,7 +358,7 @@ def test_accelerator_bnb_multi_gpu(self):
 
         PartialState._reset_state()
 
-    @require_no_torch_xla
+    @require_non_torch_xla
     @slow
     @require_bnb
     @require_multi_gpu
diff --git a/tests/test_big_modeling.py b/tests/test_big_modeling.py
@@ -30,7 +30,14 @@
     load_checkpoint_and_dispatch,
 )
 from accelerate.hooks import remove_hook_from_submodules
-from accelerate.test_utils import require_bnb, require_cuda, require_mps, require_multi_gpu, require_no_torch_xla, slow
+from accelerate.test_utils import (
+    require_bnb,
+    require_cuda,
+    require_mps,
+    require_multi_gpu,
+    require_non_torch_xla,
+    slow,
+)
 from accelerate.utils import is_torch_version, offload_state_dict
 
 
@@ -708,7 +715,7 @@ def test_cpu_offload_with_hook(self):
         hook2.offload()
         self.assertEqual(model2.weight.device, torch.device("cpu"))
 
-    @require_no_torch_xla
+    @require_non_torch_xla
     @slow
     @require_bnb
     @require_multi_gpu
@@ -740,7 +747,7 @@ def test_dispatch_model_bnb(self):
         self.assertTrue(model.h[-1].self_attention.query_key_value.weight.dtype == torch.int8)
         self.assertTrue(model.h[-1].self_attention.query_key_value.weight.device.index == 1)
 
-    @require_no_torch_xla
+    @require_non_torch_xla
     @slow
     @require_bnb
     def test_dispatch_model_int8_simple(self):
@@ -803,7 +810,7 @@ def test_dispatch_model_int8_simple(self):
         self.assertTrue(model.h[0].self_attention.query_key_value.weight.dtype == torch.int8)
         self.assertTrue(model.h[0].self_attention.query_key_value.weight.device.index == 0)
 
-    @require_no_torch_xla
+    @require_non_torch_xla
     @slow
     @require_bnb
     def test_dipatch_model_fp4_simple(self):
diff --git a/tests/test_multigpu.py b/tests/test_multigpu.py
@@ -21,7 +21,7 @@
 import accelerate
 from accelerate import Accelerator
 from accelerate.big_modeling import dispatch_model
-from accelerate.test_utils import assert_exception, execute_subprocess_async, require_multi_gpu, require_no_torch_xla
+from accelerate.test_utils import assert_exception, execute_subprocess_async, require_multi_gpu, require_non_torch_xla
 from accelerate.utils import patch_environment
 
 
@@ -55,7 +55,7 @@ def test_pad_across_processes(self):
         with patch_environment(omp_num_threads=1):
             execute_subprocess_async(cmd, env=os.environ.copy())
 
-    @require_no_torch_xla
+    @require_non_torch_xla
     @require_multi_gpu
     def test_distributed_data_loop(self):
         """
diff --git a/tests/test_quantization.py b/tests/test_quantization.py
@@ -20,7 +20,14 @@
 import torch.nn as nn
 
 from accelerate import Accelerator, init_empty_weights
-from accelerate.test_utils import require_bnb, require_cuda, require_huggingface_suite, require_multi_gpu, require_no_torch_xla, slow
+from accelerate.test_utils import (
+    require_bnb,
+    require_cuda,
+    require_huggingface_suite,
+    require_multi_gpu,
+    require_non_torch_xla,
+    slow,
+)
 from accelerate.utils.bnb import load_and_quantize_model
 from accelerate.utils.dataclasses import BnbQuantizationConfig
 
@@ -30,7 +37,8 @@ def test_BnbQuantizationConfig(self):
         with self.assertRaises(ValueError):
             BnbQuantizationConfig(load_in_8bit=True, load_in_4bit=True)
 
-@require_no_torch_xla
+
+@require_non_torch_xla
 @slow
 @require_cuda
 @require_bnb
@@ -488,7 +496,8 @@ def test_int8_serialization_shard(self):
 
             self.check_inference_correctness(model_8bit_from_saved)
 
-@require_no_torch_xla
+
+@require_non_torch_xla
 @slow
 @require_cuda
 @require_bnb
@@ -593,7 +602,8 @@ def test_fp32_8bit_conversion(self):
         model = load_and_quantize_model(model, bnb_quantization_config)
         self.assertTrue(model.lm_head.weight.dtype == torch.float32)
 
-@require_no_torch_xla
+
+@require_non_torch_xla
 @slow
 @require_cuda
 @require_bnb
@@ -842,7 +852,8 @@ def test_cpu_gpu_disk_loading_custom_device_map_kwargs(self):
             )
             self.check_inference_correctness(model_4bit)
 
-@require_no_torch_xla
+
+@require_non_torch_xla
 @slow
 @require_cuda
 @require_bnb
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -24,7 +24,7 @@
 from torch import nn
 
 from accelerate.state import PartialState
-from accelerate.test_utils.testing import require_cuda, require_no_torch_xla, require_torch_min_version
+from accelerate.test_utils.testing import require_cuda, require_non_torch_xla, require_torch_min_version
 from accelerate.test_utils.training import RegressionModel
 from accelerate.utils import (
     CannotPadNestedTensorWarning,
@@ -211,7 +211,7 @@ def test_check_os_kernel_warning_when_release_lt_min(self):
             self.assertIn("5.4.0", ctx.records[0].msg)
             self.assertIn("5.5.0", ctx.records[0].msg)
 
-    @require_no_torch_xla
+    @require_non_torch_xla
     def test_save_safetensor_shared_memory(self):
         class Model(nn.Module):
             def __init__(self):