From 384e36f87685501a427da5d5f89bc90daa1a1bec Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 14 Oct 2025 07:55:04 +0000
Subject: [PATCH 1/6] Initial plan


From 20830159c25fa25a05c8316a260f442e5062ce7c Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 14 Oct 2025 08:07:26 +0000
Subject: [PATCH 2/6] Replace torch.cuda.amp.GradScaler with
 torch.amp.GradScaler

Co-authored-by: vfdev-5 <2459423+vfdev-5@users.noreply.github.com>
---
 examples/cifar10/main.py                      |  5 ++--
 .../benchmark_torch_cuda_amp.py               |  5 ++--
 examples/cifar10_qat/main.py                  |  5 ++--
 .../CycleGAN_with_torch_cuda_amp.ipynb        | 24 +++++++++----------
 .../classification/imagenet/main.py           |  5 ++--
 .../segmentation/pascal_voc2012/main.py       |  5 ++--
 examples/transformers/main.py                 |  5 ++--
 ignite/engine/__init__.py                     | 20 ++++++++--------
 tests/ignite/engine/test_create_supervised.py | 14 +++++------
 9 files changed, 41 insertions(+), 47 deletions(-)

diff --git a/examples/cifar10/main.py b/examples/cifar10/main.py
index b8dbce5d9601..5ba566fec982 100644
--- a/examples/cifar10/main.py
+++ b/examples/cifar10/main.py
@@ -7,8 +7,7 @@
 import torch.nn as nn
 import torch.optim as optim
 import utils
-from torch.amp import autocast
-from torch.cuda.amp import GradScaler
+from torch.amp import autocast, GradScaler
 
 import ignite
 import ignite.distributed as idist
@@ -289,7 +288,7 @@ def create_trainer(model, optimizer, criterion, lr_scheduler, train_sampler, con
     #    - Two progress bars on epochs and optionally on iterations
 
     with_amp = config["with_amp"]
-    scaler = GradScaler(enabled=with_amp)
+    scaler = GradScaler('cuda', enabled=with_amp)
 
     def train_step(engine, batch):
         x, y = batch[0], batch[1]
diff --git a/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py b/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py
index 746d7eb54c49..cc045995cb90 100644
--- a/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py
+++ b/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py
@@ -1,7 +1,6 @@
 import fire
 import torch
-from torch.amp import autocast
-from torch.cuda.amp import GradScaler
+from torch.amp import autocast, GradScaler
 from torch.nn import CrossEntropyLoss
 from torch.optim import SGD
 from torchvision.models import wide_resnet50_2
@@ -26,7 +25,7 @@ def main(dataset_path, batch_size=256, max_epochs=10):
     optimizer = SGD(model.parameters(), lr=0.01)
     criterion = CrossEntropyLoss().to(device)
 
-    scaler = GradScaler()
+    scaler = GradScaler('cuda')
 
     def train_step(engine, batch):
         x = convert_tensor(batch[0], device, non_blocking=True)
diff --git a/examples/cifar10_qat/main.py b/examples/cifar10_qat/main.py
index 7b8366a2a63f..8158f5d71d62 100644
--- a/examples/cifar10_qat/main.py
+++ b/examples/cifar10_qat/main.py
@@ -6,8 +6,7 @@
 import torch.nn as nn
 import torch.optim as optim
 import utils
-from torch.amp import autocast
-from torch.cuda.amp import GradScaler
+from torch.amp import autocast, GradScaler
 
 import ignite
 import ignite.distributed as idist
@@ -273,7 +272,7 @@ def create_trainer(model, optimizer, criterion, lr_scheduler, train_sampler, con
     #    - Two progress bars on epochs and optionally on iterations
 
     with_amp = config["with_amp"]
-    scaler = GradScaler(enabled=with_amp)
+    scaler = GradScaler('cuda', enabled=with_amp)
 
     def train_step(engine, batch):
         x, y = batch[0], batch[1]
diff --git a/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb b/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
index c687267d0d52..1228bfe40c09 100644
--- a/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
+++ b/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
@@ -875,10 +875,10 @@
     "As suggested, we divide the objective by 2 while optimizing D, which slows down the rate at which D learns, relative to the rate of G. \n",
     "\n",
     "According to the paper:\n",
-    "- generator A is trained  minimize $\\text{mean}_{x \\in A}[(D_B(G(x)) − 1)^2]$ and cycle loss $\\text{mean}_{x \\in A}\\left[ |F(G(x)) - x|_1 \\right]$\n",
-    "- generator B is trained  minimize $\\text{mean}_{y \\in B}[(D_A(F(y)) − 1)^2]$ and cycle loss $\\text{mean}_{y \\in B}\\left[ |G(F(y)) - y|_1 \\right]$\n",
-    "- discriminators A is trained to minimize $\\text{mean}_{x \\in A}[(D_A(x) − 1)^2] + \\text{mean}_{y \\in B}[D_A(F(y))^2]$.\n",
-    "- discriminator B is trained to minimize $\\text{mean}_{y \\in B}[(D_B(y) − 1)^2] + \\text{mean}_{x \\in A}[D_B(G(x))^2]$."
+    "- generator A is trained  minimize $\\text{mean}_{x \\in A}[(D_B(G(x)) \u2212 1)^2]$ and cycle loss $\\text{mean}_{x \\in A}\\left[ |F(G(x)) - x|_1 \\right]$\n",
+    "- generator B is trained  minimize $\\text{mean}_{y \\in B}[(D_A(F(y)) \u2212 1)^2]$ and cycle loss $\\text{mean}_{y \\in B}\\left[ |G(F(y)) - y|_1 \\right]$\n",
+    "- discriminators A is trained to minimize $\\text{mean}_{x \\in A}[(D_A(x) \u2212 1)^2] + \\text{mean}_{y \\in B}[D_A(F(y))^2]$.\n",
+    "- discriminator B is trained to minimize $\\text{mean}_{y \\in B}[(D_B(y) \u2212 1)^2] + \\text{mean}_{x \\in A}[D_B(G(x))^2]$."
    ]
   },
   {
@@ -887,7 +887,7 @@
     "id": "JE8dLeEfIl_Z"
    },
    "source": [
-    "We will use [`torch.amp.autocast`](https://pytorch.org/docs/master/amp.html#torch.amp.autocast) and [`torch.cuda.amp.GradScaler`](https://pytorch.org/docs/master/amp.html#torch.cuda.amp.GradScaler) to perform automatic mixed precision training. Our code follows a [typical mixed precision training example](https://pytorch.org/docs/master/notes/amp_examples.html#typical-mixed-precision-training)."
+    "We will use [`torch.amp.autocast`](https://pytorch.org/docs/master/amp.html#torch.amp.autocast) and [`torch.amp.GradScaler`](https://pytorch.org/docs/master/amp.html#torch.amp.GradScaler) to perform automatic mixed precision training. Our code follows a [typical mixed precision training example](https://pytorch.org/docs/master/notes/amp_examples.html#typical-mixed-precision-training)."
    ]
   },
   {
@@ -896,7 +896,7 @@
     "id": "vrJls4p-FRcA"
    },
    "source": [
-    "from torch.cuda.amp import GradScaler\n",
+    "from torch.amp import GradScaler\n",
     "from torch.amp import autocast\n",
     "\n",
     "from ignite.utils import convert_tensor\n",
@@ -924,7 +924,7 @@
     "\n",
     "\n",
     "def compute_loss_discriminator(decision_real, decision_fake):\n",
-    "    # loss = mean (D_b(y) − 1)^2 + mean D_b(G(x))^2    \n",
+    "    # loss = mean (D_b(y) \u2212 1)^2 + mean D_b(G(x))^2    \n",
     "    loss = F.mse_loss(decision_fake, torch.zeros_like(decision_fake))\n",
     "    loss += F.mse_loss(decision_real, torch.ones_like(decision_real))\n",
     "    return loss\n",
@@ -954,10 +954,10 @@
     "        decision_fake_b = discriminator_B(fake_b)\n",
     "\n",
     "        # Compute loss for generators and update generators\n",
-    "        # loss_a2b = GAN loss: mean (D_b(G(x)) − 1)^2 + Forward cycle loss: || F(G(x)) - x ||_1    \n",
+    "        # loss_a2b = GAN loss: mean (D_b(G(x)) \u2212 1)^2 + Forward cycle loss: || F(G(x)) - x ||_1    \n",
     "        loss_a2b = compute_loss_generator(decision_fake_b, real_a, rec_a, lambda_value)    \n",
     "\n",
-    "        # loss_b2a = GAN loss: mean (D_a(F(x)) − 1)^2 + Backward cycle loss: || G(F(y)) - y ||_1\n",
+    "        # loss_b2a = GAN loss: mean (D_a(F(x)) \u2212 1)^2 + Backward cycle loss: || G(F(y)) - y ||_1\n",
     "        loss_b2a = compute_loss_generator(decision_fake_a, real_b, rec_b, lambda_value)\n",
     "\n",
     "        # total generators loss:\n",
@@ -977,10 +977,10 @@
     "        decision_real_a, decision_fake_a = discriminator_forward_pass(discriminator_A, real_a, fake_a.detach(), fake_a_buffer)    \n",
     "        decision_real_b, decision_fake_b = discriminator_forward_pass(discriminator_B, real_b, fake_b.detach(), fake_b_buffer)    \n",
     "        # Compute loss for discriminators and update discriminators\n",
-    "        # loss_a = mean (D_a(y) − 1)^2 + mean D_a(F(x))^2\n",
+    "        # loss_a = mean (D_a(y) \u2212 1)^2 + mean D_a(F(x))^2\n",
     "        loss_a = compute_loss_discriminator(decision_real_a, decision_fake_a)\n",
     "\n",
-    "        # loss_b = mean (D_b(y) − 1)^2 + mean D_b(G(x))^2\n",
+    "        # loss_b = mean (D_b(y) \u2212 1)^2 + mean D_b(G(x))^2\n",
     "        loss_b = compute_loss_discriminator(decision_real_b, decision_fake_b)\n",
     "      \n",
     "        # total discriminators loss:\n",
@@ -1578,4 +1578,4 @@
    "outputs": []
   }
  ]
-}
+}
\ No newline at end of file
diff --git a/examples/references/classification/imagenet/main.py b/examples/references/classification/imagenet/main.py
index defb4ddc1510..e6eda1a4db95 100644
--- a/examples/references/classification/imagenet/main.py
+++ b/examples/references/classification/imagenet/main.py
@@ -6,8 +6,7 @@
 import torch
 
 try:
-    from torch.amp import autocast
-    from torch.cuda.amp import GradScaler
+    from torch.amp import autocast, GradScaler
 except ImportError:
     raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.12.0")
 
@@ -140,7 +139,7 @@ def create_trainer(model, optimizer, criterion, train_sampler, config, logger, w
     model_output_transform = config.get("model_output_transform", lambda x: x)
 
     with_amp = config.get("with_amp", True)
-    scaler = GradScaler(enabled=with_amp)
+    scaler = GradScaler('cuda', enabled=with_amp)
 
     def training_step(engine, batch):
         model.train()
diff --git a/examples/references/segmentation/pascal_voc2012/main.py b/examples/references/segmentation/pascal_voc2012/main.py
index b6fbc7ad494a..5e1208822686 100644
--- a/examples/references/segmentation/pascal_voc2012/main.py
+++ b/examples/references/segmentation/pascal_voc2012/main.py
@@ -6,8 +6,7 @@
 import torch
 
 try:
-    from torch.amp import autocast
-    from torch.cuda.amp import GradScaler
+    from torch.amp import autocast, GradScaler
 except ImportError:
     raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.12.0")
 
@@ -187,7 +186,7 @@ def create_trainer(model, optimizer, criterion, train_sampler, config, logger, w
     model_output_transform = config.get("model_output_transform", lambda x: x)
 
     with_amp = config.get("with_amp", True)
-    scaler = GradScaler(enabled=with_amp)
+    scaler = GradScaler('cuda', enabled=with_amp)
 
     def forward_pass(batch):
         model.train()
diff --git a/examples/transformers/main.py b/examples/transformers/main.py
index f8118eabf90e..53f52a06a843 100644
--- a/examples/transformers/main.py
+++ b/examples/transformers/main.py
@@ -7,8 +7,7 @@
 import torch.nn as nn
 import torch.optim as optim
 import utils
-from torch.amp import autocast
-from torch.cuda.amp import GradScaler
+from torch.amp import autocast, GradScaler
 
 import ignite
 import ignite.distributed as idist
@@ -298,7 +297,7 @@ def create_trainer(model, optimizer, criterion, lr_scheduler, train_sampler, con
     #    - Two progress bars on epochs and optionally on iterations
 
     with_amp = config["with_amp"]
-    scaler = GradScaler(enabled=with_amp)
+    scaler = GradScaler('cuda', enabled=with_amp)
 
     def train_step(engine, batch):
         input_batch = batch[0]
diff --git a/ignite/engine/__init__.py b/ignite/engine/__init__.py
index 6e82bc2f6bc7..b337e746f734 100644
--- a/ignite/engine/__init__.py
+++ b/ignite/engine/__init__.py
@@ -133,11 +133,11 @@ def supervised_training_step_amp(
     prepare_batch: Callable = _prepare_batch,
     model_transform: Callable[[Any], Any] = lambda output: output,
     output_transform: Callable[[Any, Any, Any, torch.Tensor], Any] = lambda x, y, y_pred, loss: loss.item(),
-    scaler: Optional["torch.cuda.amp.GradScaler"] = None,
+    scaler: Optional["torch.amp.GradScaler"] = None,
     gradient_accumulation_steps: int = 1,
     model_fn: Callable[[torch.nn.Module, Any], Any] = lambda model, x: model(x),
 ) -> Callable:
-    """Factory function for supervised training using ``torch.cuda.amp``.
+    """Factory function for supervised training using ``torch.amp``.
 
     Args:
         model: the model to train.
@@ -170,7 +170,7 @@ def supervised_training_step_amp(
             model = ...
             optimizer = ...
             loss_fn = ...
-            scaler = torch.cuda.amp.GradScaler(2**10)
+            scaler = torch.amp.GradScaler('cuda', 2**10)
 
             update_fn = supervised_training_step_amp(model, optimizer, loss_fn, 'cuda', scaler=scaler)
             trainer = Engine(update_fn)
@@ -185,7 +185,7 @@ def supervised_training_step_amp(
     """
 
     try:
-        from torch.amp import autocast
+        from torch.amp import autocast, GradScaler
     except ImportError:
         raise ImportError("Please install torch>=1.12.0 to use amp_mode='amp'.")
 
@@ -393,8 +393,8 @@ def update(engine: Engine, batch: Sequence[torch.Tensor]) -> Union[Any, Tuple[to
 
 
 def _check_arg(
-    on_tpu: bool, on_mps: bool, amp_mode: Optional[str], scaler: Optional[Union[bool, "torch.cuda.amp.GradScaler"]]
-) -> Tuple[Optional[str], Optional["torch.cuda.amp.GradScaler"]]:
+    on_tpu: bool, on_mps: bool, amp_mode: Optional[str], scaler: Optional[Union[bool, "torch.amp.GradScaler"]]
+) -> Tuple[Optional[str], Optional["torch.amp.GradScaler"]]:
     """Checking tpu, mps, amp and GradScaler instance combinations."""
     if on_mps and amp_mode:
         raise ValueError("amp_mode cannot be used with mps device. Consider using amp_mode=None or device='cuda'.")
@@ -410,10 +410,10 @@ def _check_arg(
             raise ValueError(f"scaler argument is {scaler}, but amp_mode is {amp_mode}. Consider using amp_mode='amp'.")
         elif amp_mode == "amp" and isinstance(scaler, bool):
             try:
-                from torch.cuda.amp import GradScaler
+                from torch.amp import GradScaler
             except ImportError:
                 raise ImportError("Please install torch>=1.6.0 to use scaler argument.")
-            scaler = GradScaler(enabled=True)
+            scaler = GradScaler('cuda', enabled=True)
 
     if on_tpu:
         return "tpu", None
@@ -434,7 +434,7 @@ def create_supervised_trainer(
     output_transform: Callable[[Any, Any, Any, torch.Tensor], Any] = lambda x, y, y_pred, loss: loss.item(),
     deterministic: bool = False,
     amp_mode: Optional[str] = None,
-    scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False,
+    scaler: Union[bool, "torch.amp.GradScaler"] = False,
     gradient_accumulation_steps: int = 1,
     model_fn: Callable[[torch.nn.Module, Any], Any] = lambda model, x: model(x),
 ) -> Engine:
@@ -459,7 +459,7 @@ def create_supervised_trainer(
             :class:`~ignite.engine.deterministic.DeterministicEngine`, otherwise :class:`~ignite.engine.engine.Engine`
             (default: False).
         amp_mode: can be ``amp`` or ``apex``, model and optimizer will be casted to float16 using
-            `torch.cuda.amp <https://pytorch.org/docs/stable/amp.html>`_ for ``amp`` and
+            `torch.amp <https://pytorch.org/docs/stable/amp.html>`_ for ``amp`` and
             using `apex <https://nvidia.github.io/apex>`_ for ``apex``. (default: None)
         scaler: GradScaler instance for gradient scaling if `torch>=1.6.0`
             and ``amp_mode`` is ``amp``. If ``amp_mode`` is ``apex``, this argument will be ignored.
diff --git a/tests/ignite/engine/test_create_supervised.py b/tests/ignite/engine/test_create_supervised.py
index ba42baddddae..d17f58867e7e 100644
--- a/tests/ignite/engine/test_create_supervised.py
+++ b/tests/ignite/engine/test_create_supervised.py
@@ -48,7 +48,7 @@ def _default_create_supervised_trainer(
     trainer_device: Optional[str] = None,
     trace: bool = False,
     amp_mode: str = None,
-    scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False,
+    scaler: Union[bool, "torch.amp.GradScaler"] = False,
     with_model_transform: bool = False,
     with_model_fn: bool = False,
 ):
@@ -104,7 +104,7 @@ def _test_create_supervised_trainer(
     trainer_device: Optional[str] = None,
     trace: bool = False,
     amp_mode: str = None,
-    scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False,
+    scaler: Union[bool, "torch.amp.GradScaler"] = False,
     with_model_transform: bool = False,
     with_model_fn: bool = False,
 ):
@@ -170,10 +170,10 @@ def _():
 @pytest.mark.skipif(Version(torch.__version__) < Version("1.12.0"), reason="Skip if < 1.12.0")
 def test_create_supervised_training_scalar_assignment():
     with mock.patch("ignite.engine._check_arg") as check_arg_mock:
-        check_arg_mock.return_value = None, torch.cuda.amp.GradScaler(enabled=False)
+        check_arg_mock.return_value = None, torch.amp.GradScaler('cuda', enabled=False)
         trainer, _ = _default_create_supervised_trainer(model_device="cpu", trainer_device="cpu", scaler=True)
         assert hasattr(trainer.state, "scaler")
-        assert isinstance(trainer.state.scaler, torch.cuda.amp.GradScaler)
+        assert isinstance(trainer.state.scaler, torch.amp.GradScaler)
 
 
 def _test_create_mocked_supervised_trainer(
@@ -181,7 +181,7 @@ def _test_create_mocked_supervised_trainer(
     trainer_device: Optional[str] = None,
     trace: bool = False,
     amp_mode: str = None,
-    scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False,
+    scaler: Union[bool, "torch.amp.GradScaler"] = False,
 ):
     with mock.patch("ignite.engine.supervised_training_step_amp") as training_step_amp_mock:
         with mock.patch("ignite.engine.supervised_training_step_apex") as training_step_apex_mock:
@@ -462,7 +462,7 @@ def test_create_supervised_trainer_amp_error(mock_torch_cuda_amp_module):
 
 @pytest.mark.skipif(Version(torch.__version__) < Version("1.12.0"), reason="Skip if < 1.12.0")
 def test_create_supervised_trainer_scaler_not_amp():
-    scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())
+    scaler = torch.amp.GradScaler('cuda', enabled=torch.cuda.is_available())
 
     with pytest.raises(ValueError, match=f"scaler argument is {scaler}, but amp_mode is None."):
         _test_create_supervised_trainer(amp_mode=None, scaler=scaler)
@@ -540,7 +540,7 @@ def test_create_supervised_trainer_on_cuda_amp_scaler():
     _test_create_mocked_supervised_trainer(
         model_device=model_device, trainer_device=trainer_device, amp_mode="amp", scaler=True
     )
-    scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())
+    scaler = torch.amp.GradScaler('cuda', enabled=torch.cuda.is_available())
     _test_create_supervised_trainer(
         gradient_accumulation_steps=1,
         model_device=model_device,

From c430571bc57ea59df82893953c90ad3c640316c4 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 14 Oct 2025 08:36:35 +0000
Subject: [PATCH 3/6] Address review comments: remove cuda parameter from
 GradScaler, update PyTorch version requirements, revert notebook changes

Co-authored-by: vfdev-5 <2459423+vfdev-5@users.noreply.github.com>
---
 examples/cifar10/main.py                      |  2 +-
 .../benchmark_torch_cuda_amp.py               |  2 +-
 examples/cifar10_qat/main.py                  |  2 +-
 .../CycleGAN_with_torch_cuda_amp.ipynb        | 24 +++++++++----------
 .../classification/imagenet/main.py           |  4 ++--
 .../segmentation/pascal_voc2012/main.py       |  4 ++--
 examples/transformers/main.py                 |  2 +-
 ignite/engine/__init__.py                     |  6 ++---
 8 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/examples/cifar10/main.py b/examples/cifar10/main.py
index 5ba566fec982..37ffdbb70250 100644
--- a/examples/cifar10/main.py
+++ b/examples/cifar10/main.py
@@ -288,7 +288,7 @@ def create_trainer(model, optimizer, criterion, lr_scheduler, train_sampler, con
     #    - Two progress bars on epochs and optionally on iterations
 
     with_amp = config["with_amp"]
-    scaler = GradScaler('cuda', enabled=with_amp)
+    scaler = GradScaler(enabled=with_amp)
 
     def train_step(engine, batch):
         x, y = batch[0], batch[1]
diff --git a/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py b/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py
index cc045995cb90..03d53752c4e3 100644
--- a/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py
+++ b/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py
@@ -25,7 +25,7 @@ def main(dataset_path, batch_size=256, max_epochs=10):
     optimizer = SGD(model.parameters(), lr=0.01)
     criterion = CrossEntropyLoss().to(device)
 
-    scaler = GradScaler('cuda')
+    scaler = GradScaler()
 
     def train_step(engine, batch):
         x = convert_tensor(batch[0], device, non_blocking=True)
diff --git a/examples/cifar10_qat/main.py b/examples/cifar10_qat/main.py
index 8158f5d71d62..48f38fa04a45 100644
--- a/examples/cifar10_qat/main.py
+++ b/examples/cifar10_qat/main.py
@@ -272,7 +272,7 @@ def create_trainer(model, optimizer, criterion, lr_scheduler, train_sampler, con
     #    - Two progress bars on epochs and optionally on iterations
 
     with_amp = config["with_amp"]
-    scaler = GradScaler('cuda', enabled=with_amp)
+    scaler = GradScaler(enabled=with_amp)
 
     def train_step(engine, batch):
         x, y = batch[0], batch[1]
diff --git a/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb b/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
index 1228bfe40c09..c687267d0d52 100644
--- a/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
+++ b/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
@@ -875,10 +875,10 @@
     "As suggested, we divide the objective by 2 while optimizing D, which slows down the rate at which D learns, relative to the rate of G. \n",
     "\n",
     "According to the paper:\n",
-    "- generator A is trained  minimize $\\text{mean}_{x \\in A}[(D_B(G(x)) \u2212 1)^2]$ and cycle loss $\\text{mean}_{x \\in A}\\left[ |F(G(x)) - x|_1 \\right]$\n",
-    "- generator B is trained  minimize $\\text{mean}_{y \\in B}[(D_A(F(y)) \u2212 1)^2]$ and cycle loss $\\text{mean}_{y \\in B}\\left[ |G(F(y)) - y|_1 \\right]$\n",
-    "- discriminators A is trained to minimize $\\text{mean}_{x \\in A}[(D_A(x) \u2212 1)^2] + \\text{mean}_{y \\in B}[D_A(F(y))^2]$.\n",
-    "- discriminator B is trained to minimize $\\text{mean}_{y \\in B}[(D_B(y) \u2212 1)^2] + \\text{mean}_{x \\in A}[D_B(G(x))^2]$."
+    "- generator A is trained  minimize $\\text{mean}_{x \\in A}[(D_B(G(x)) − 1)^2]$ and cycle loss $\\text{mean}_{x \\in A}\\left[ |F(G(x)) - x|_1 \\right]$\n",
+    "- generator B is trained  minimize $\\text{mean}_{y \\in B}[(D_A(F(y)) − 1)^2]$ and cycle loss $\\text{mean}_{y \\in B}\\left[ |G(F(y)) - y|_1 \\right]$\n",
+    "- discriminators A is trained to minimize $\\text{mean}_{x \\in A}[(D_A(x) − 1)^2] + \\text{mean}_{y \\in B}[D_A(F(y))^2]$.\n",
+    "- discriminator B is trained to minimize $\\text{mean}_{y \\in B}[(D_B(y) − 1)^2] + \\text{mean}_{x \\in A}[D_B(G(x))^2]$."
    ]
   },
   {
@@ -887,7 +887,7 @@
     "id": "JE8dLeEfIl_Z"
    },
    "source": [
-    "We will use [`torch.amp.autocast`](https://pytorch.org/docs/master/amp.html#torch.amp.autocast) and [`torch.amp.GradScaler`](https://pytorch.org/docs/master/amp.html#torch.amp.GradScaler) to perform automatic mixed precision training. Our code follows a [typical mixed precision training example](https://pytorch.org/docs/master/notes/amp_examples.html#typical-mixed-precision-training)."
+    "We will use [`torch.amp.autocast`](https://pytorch.org/docs/master/amp.html#torch.amp.autocast) and [`torch.cuda.amp.GradScaler`](https://pytorch.org/docs/master/amp.html#torch.cuda.amp.GradScaler) to perform automatic mixed precision training. Our code follows a [typical mixed precision training example](https://pytorch.org/docs/master/notes/amp_examples.html#typical-mixed-precision-training)."
    ]
   },
   {
@@ -896,7 +896,7 @@
     "id": "vrJls4p-FRcA"
    },
    "source": [
-    "from torch.amp import GradScaler\n",
+    "from torch.cuda.amp import GradScaler\n",
     "from torch.amp import autocast\n",
     "\n",
     "from ignite.utils import convert_tensor\n",
@@ -924,7 +924,7 @@
     "\n",
     "\n",
     "def compute_loss_discriminator(decision_real, decision_fake):\n",
-    "    # loss = mean (D_b(y) \u2212 1)^2 + mean D_b(G(x))^2    \n",
+    "    # loss = mean (D_b(y) − 1)^2 + mean D_b(G(x))^2    \n",
     "    loss = F.mse_loss(decision_fake, torch.zeros_like(decision_fake))\n",
     "    loss += F.mse_loss(decision_real, torch.ones_like(decision_real))\n",
     "    return loss\n",
@@ -954,10 +954,10 @@
     "        decision_fake_b = discriminator_B(fake_b)\n",
     "\n",
     "        # Compute loss for generators and update generators\n",
-    "        # loss_a2b = GAN loss: mean (D_b(G(x)) \u2212 1)^2 + Forward cycle loss: || F(G(x)) - x ||_1    \n",
+    "        # loss_a2b = GAN loss: mean (D_b(G(x)) − 1)^2 + Forward cycle loss: || F(G(x)) - x ||_1    \n",
     "        loss_a2b = compute_loss_generator(decision_fake_b, real_a, rec_a, lambda_value)    \n",
     "\n",
-    "        # loss_b2a = GAN loss: mean (D_a(F(x)) \u2212 1)^2 + Backward cycle loss: || G(F(y)) - y ||_1\n",
+    "        # loss_b2a = GAN loss: mean (D_a(F(x)) − 1)^2 + Backward cycle loss: || G(F(y)) - y ||_1\n",
     "        loss_b2a = compute_loss_generator(decision_fake_a, real_b, rec_b, lambda_value)\n",
     "\n",
     "        # total generators loss:\n",
@@ -977,10 +977,10 @@
     "        decision_real_a, decision_fake_a = discriminator_forward_pass(discriminator_A, real_a, fake_a.detach(), fake_a_buffer)    \n",
     "        decision_real_b, decision_fake_b = discriminator_forward_pass(discriminator_B, real_b, fake_b.detach(), fake_b_buffer)    \n",
     "        # Compute loss for discriminators and update discriminators\n",
-    "        # loss_a = mean (D_a(y) \u2212 1)^2 + mean D_a(F(x))^2\n",
+    "        # loss_a = mean (D_a(y) − 1)^2 + mean D_a(F(x))^2\n",
     "        loss_a = compute_loss_discriminator(decision_real_a, decision_fake_a)\n",
     "\n",
-    "        # loss_b = mean (D_b(y) \u2212 1)^2 + mean D_b(G(x))^2\n",
+    "        # loss_b = mean (D_b(y) − 1)^2 + mean D_b(G(x))^2\n",
     "        loss_b = compute_loss_discriminator(decision_real_b, decision_fake_b)\n",
     "      \n",
     "        # total discriminators loss:\n",
@@ -1578,4 +1578,4 @@
    "outputs": []
   }
  ]
-}
\ No newline at end of file
+}
diff --git a/examples/references/classification/imagenet/main.py b/examples/references/classification/imagenet/main.py
index e6eda1a4db95..3069523c1115 100644
--- a/examples/references/classification/imagenet/main.py
+++ b/examples/references/classification/imagenet/main.py
@@ -8,7 +8,7 @@
 try:
     from torch.amp import autocast, GradScaler
 except ImportError:
-    raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.12.0")
+    raise RuntimeError("Please, use recent PyTorch version, e.g. >=2.3.1")
 
 import dataflow as data
 import utils
@@ -139,7 +139,7 @@ def create_trainer(model, optimizer, criterion, train_sampler, config, logger, w
     model_output_transform = config.get("model_output_transform", lambda x: x)
 
     with_amp = config.get("with_amp", True)
-    scaler = GradScaler('cuda', enabled=with_amp)
+    scaler = GradScaler(enabled=with_amp)
 
     def training_step(engine, batch):
         model.train()
diff --git a/examples/references/segmentation/pascal_voc2012/main.py b/examples/references/segmentation/pascal_voc2012/main.py
index 5e1208822686..34f43b895879 100644
--- a/examples/references/segmentation/pascal_voc2012/main.py
+++ b/examples/references/segmentation/pascal_voc2012/main.py
@@ -8,7 +8,7 @@
 try:
     from torch.amp import autocast, GradScaler
 except ImportError:
-    raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.12.0")
+    raise RuntimeError("Please, use recent PyTorch version, e.g. >=2.3.1")
 
 import dataflow as data
 import utils
@@ -186,7 +186,7 @@ def create_trainer(model, optimizer, criterion, train_sampler, config, logger, w
     model_output_transform = config.get("model_output_transform", lambda x: x)
 
     with_amp = config.get("with_amp", True)
-    scaler = GradScaler('cuda', enabled=with_amp)
+    scaler = GradScaler(enabled=with_amp)
 
     def forward_pass(batch):
         model.train()
diff --git a/examples/transformers/main.py b/examples/transformers/main.py
index 53f52a06a843..8dc90a7ef9e3 100644
--- a/examples/transformers/main.py
+++ b/examples/transformers/main.py
@@ -297,7 +297,7 @@ def create_trainer(model, optimizer, criterion, lr_scheduler, train_sampler, con
     #    - Two progress bars on epochs and optionally on iterations
 
     with_amp = config["with_amp"]
-    scaler = GradScaler('cuda', enabled=with_amp)
+    scaler = GradScaler(enabled=with_amp)
 
     def train_step(engine, batch):
         input_batch = batch[0]
diff --git a/ignite/engine/__init__.py b/ignite/engine/__init__.py
index b337e746f734..6300011fd79d 100644
--- a/ignite/engine/__init__.py
+++ b/ignite/engine/__init__.py
@@ -187,7 +187,7 @@ def supervised_training_step_amp(
     try:
         from torch.amp import autocast, GradScaler
     except ImportError:
-        raise ImportError("Please install torch>=1.12.0 to use amp_mode='amp'.")
+        raise ImportError("Please install torch>=2.3.1 to use amp_mode='amp'.")
 
     if gradient_accumulation_steps <= 0:
         raise ValueError(
@@ -412,8 +412,8 @@ def _check_arg(
             try:
                 from torch.amp import GradScaler
             except ImportError:
-                raise ImportError("Please install torch>=1.6.0 to use scaler argument.")
-            scaler = GradScaler('cuda', enabled=True)
+                raise ImportError("Please install torch>=2.3.1 to use scaler argument.")
+            scaler = GradScaler(enabled=True)
 
     if on_tpu:
         return "tpu", None

From 942d8f4613fbc6d90238cec6b74d5a3fb92e3889 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 14 Oct 2025 09:01:21 +0000
Subject: [PATCH 4/6] Fix test failures: remove cuda parameter from test
 GradScaler instantiations and revert to correct PyTorch version requirements

Co-authored-by: vfdev-5 <2459423+vfdev-5@users.noreply.github.com>
---
 examples/references/classification/imagenet/main.py     | 2 +-
 examples/references/segmentation/pascal_voc2012/main.py | 2 +-
 ignite/engine/__init__.py                               | 4 ++--
 tests/ignite/engine/test_create_supervised.py           | 6 +++---
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/examples/references/classification/imagenet/main.py b/examples/references/classification/imagenet/main.py
index 3069523c1115..efa0c8b146cb 100644
--- a/examples/references/classification/imagenet/main.py
+++ b/examples/references/classification/imagenet/main.py
@@ -8,7 +8,7 @@
 try:
     from torch.amp import autocast, GradScaler
 except ImportError:
-    raise RuntimeError("Please, use recent PyTorch version, e.g. >=2.3.1")
+    raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.12.0")
 
 import dataflow as data
 import utils
diff --git a/examples/references/segmentation/pascal_voc2012/main.py b/examples/references/segmentation/pascal_voc2012/main.py
index 34f43b895879..b52dcd468ae9 100644
--- a/examples/references/segmentation/pascal_voc2012/main.py
+++ b/examples/references/segmentation/pascal_voc2012/main.py
@@ -8,7 +8,7 @@
 try:
     from torch.amp import autocast, GradScaler
 except ImportError:
-    raise RuntimeError("Please, use recent PyTorch version, e.g. >=2.3.1")
+    raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.12.0")
 
 import dataflow as data
 import utils
diff --git a/ignite/engine/__init__.py b/ignite/engine/__init__.py
index 6300011fd79d..e8c4e8b6d835 100644
--- a/ignite/engine/__init__.py
+++ b/ignite/engine/__init__.py
@@ -187,7 +187,7 @@ def supervised_training_step_amp(
     try:
         from torch.amp import autocast, GradScaler
     except ImportError:
-        raise ImportError("Please install torch>=2.3.1 to use amp_mode='amp'.")
+        raise ImportError("Please install torch>=1.12.0 to use amp_mode='amp'.")
 
     if gradient_accumulation_steps <= 0:
         raise ValueError(
@@ -412,7 +412,7 @@ def _check_arg(
             try:
                 from torch.amp import GradScaler
             except ImportError:
-                raise ImportError("Please install torch>=2.3.1 to use scaler argument.")
+                raise ImportError("Please install torch>=1.6.0 to use scaler argument.")
             scaler = GradScaler(enabled=True)
 
     if on_tpu:
diff --git a/tests/ignite/engine/test_create_supervised.py b/tests/ignite/engine/test_create_supervised.py
index d17f58867e7e..6bd759f9c2b5 100644
--- a/tests/ignite/engine/test_create_supervised.py
+++ b/tests/ignite/engine/test_create_supervised.py
@@ -170,7 +170,7 @@ def _():
 @pytest.mark.skipif(Version(torch.__version__) < Version("1.12.0"), reason="Skip if < 1.12.0")
 def test_create_supervised_training_scalar_assignment():
     with mock.patch("ignite.engine._check_arg") as check_arg_mock:
-        check_arg_mock.return_value = None, torch.amp.GradScaler('cuda', enabled=False)
+        check_arg_mock.return_value = None, torch.amp.GradScaler(enabled=False)
         trainer, _ = _default_create_supervised_trainer(model_device="cpu", trainer_device="cpu", scaler=True)
         assert hasattr(trainer.state, "scaler")
         assert isinstance(trainer.state.scaler, torch.amp.GradScaler)
@@ -462,7 +462,7 @@ def test_create_supervised_trainer_amp_error(mock_torch_cuda_amp_module):
 
 @pytest.mark.skipif(Version(torch.__version__) < Version("1.12.0"), reason="Skip if < 1.12.0")
 def test_create_supervised_trainer_scaler_not_amp():
-    scaler = torch.amp.GradScaler('cuda', enabled=torch.cuda.is_available())
+    scaler = torch.amp.GradScaler(enabled=torch.cuda.is_available())
 
     with pytest.raises(ValueError, match=f"scaler argument is {scaler}, but amp_mode is None."):
         _test_create_supervised_trainer(amp_mode=None, scaler=scaler)
@@ -540,7 +540,7 @@ def test_create_supervised_trainer_on_cuda_amp_scaler():
     _test_create_mocked_supervised_trainer(
         model_device=model_device, trainer_device=trainer_device, amp_mode="amp", scaler=True
     )
-    scaler = torch.amp.GradScaler('cuda', enabled=torch.cuda.is_available())
+    scaler = torch.amp.GradScaler(enabled=torch.cuda.is_available())
     _test_create_supervised_trainer(
         gradient_accumulation_steps=1,
         model_device=model_device,

From b97301c8204c3ce5a9364c7cb52af7ee772422dd Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 14 Oct 2025 09:41:19 +0000
Subject: [PATCH 5/6] Apply torch.cuda.amp.GradScaler to torch.amp.GradScaler
 replacements in README, docs, and notebooks

Co-authored-by: vfdev-5 <2459423+vfdev-5@users.noreply.github.com>
---
 README.md                                     |  2 +-
 docs/source/conf.py                           |  2 +-
 .../CycleGAN_with_torch_cuda_amp.ipynb        | 25 +++++++++----------
 3 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 2cfa25e36810..20dd3156e078 100644
--- a/README.md
+++ b/README.md
@@ -397,7 +397,7 @@ Few pointers to get you started:
 - [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/ignite/blob/master/examples/notebooks/FastaiLRFinder_MNIST.ipynb) [Basic example of LR finder on
   MNIST](https://github.com/pytorch/ignite/blob/master/examples/notebooks/FastaiLRFinder_MNIST.ipynb)
 - [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/ignite/blob/master/examples/notebooks/Cifar100_bench_amp.ipynb) [Benchmark mixed precision training on Cifar100:
-  torch.cuda.amp vs nvidia/apex](https://github.com/pytorch/ignite/blob/master/examples/notebooks/Cifar100_bench_amp.ipynb)
+  torch.amp vs nvidia/apex](https://github.com/pytorch/ignite/blob/master/examples/notebooks/Cifar100_bench_amp.ipynb)
 - [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/ignite/blob/master/examples/notebooks/MNIST_on_TPU.ipynb) [MNIST training on a single
   TPU](https://github.com/pytorch/ignite/blob/master/examples/notebooks/MNIST_on_TPU.ipynb)
 - [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1E9zJrptnLJ_PKhmaP5Vhb6DTVRvyrKHx) [CIFAR10 Training on multiple TPUs](https://github.com/pytorch/ignite/tree/master/examples/cifar10)
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 6fdadc7b34b4..5f28cba5bd19 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -354,7 +354,7 @@ def run(self):
     ("py:class", "torch.optim.optimizer.Optimizer"),
     ("py:class", "torch.utils.data.dataset.Dataset"),
     ("py:class", "torch.utils.data.sampler.BatchSampler"),
-    ("py:class", "torch.cuda.amp.grad_scaler.GradScaler"),
+    ("py:class", "torch.amp.grad_scaler.GradScaler"),
     ("py:class", "torch.optim.lr_scheduler._LRScheduler"),
     ("py:class", "torch.optim.lr_scheduler.LRScheduler"),
     ("py:class", "torch.utils.data.dataloader.DataLoader"),
diff --git a/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb b/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
index c687267d0d52..4f280013b596 100644
--- a/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
+++ b/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
@@ -875,10 +875,10 @@
     "As suggested, we divide the objective by 2 while optimizing D, which slows down the rate at which D learns, relative to the rate of G. \n",
     "\n",
     "According to the paper:\n",
-    "- generator A is trained  minimize $\\text{mean}_{x \\in A}[(D_B(G(x)) − 1)^2]$ and cycle loss $\\text{mean}_{x \\in A}\\left[ |F(G(x)) - x|_1 \\right]$\n",
-    "- generator B is trained  minimize $\\text{mean}_{y \\in B}[(D_A(F(y)) − 1)^2]$ and cycle loss $\\text{mean}_{y \\in B}\\left[ |G(F(y)) - y|_1 \\right]$\n",
-    "- discriminators A is trained to minimize $\\text{mean}_{x \\in A}[(D_A(x) − 1)^2] + \\text{mean}_{y \\in B}[D_A(F(y))^2]$.\n",
-    "- discriminator B is trained to minimize $\\text{mean}_{y \\in B}[(D_B(y) − 1)^2] + \\text{mean}_{x \\in A}[D_B(G(x))^2]$."
+    "- generator A is trained  minimize $\\text{mean}_{x \\in A}[(D_B(G(x)) \u2212 1)^2]$ and cycle loss $\\text{mean}_{x \\in A}\\left[ |F(G(x)) - x|_1 \\right]$\n",
+    "- generator B is trained  minimize $\\text{mean}_{y \\in B}[(D_A(F(y)) \u2212 1)^2]$ and cycle loss $\\text{mean}_{y \\in B}\\left[ |G(F(y)) - y|_1 \\right]$\n",
+    "- discriminators A is trained to minimize $\\text{mean}_{x \\in A}[(D_A(x) \u2212 1)^2] + \\text{mean}_{y \\in B}[D_A(F(y))^2]$.\n",
+    "- discriminator B is trained to minimize $\\text{mean}_{y \\in B}[(D_B(y) \u2212 1)^2] + \\text{mean}_{x \\in A}[D_B(G(x))^2]$."
    ]
   },
   {
@@ -887,7 +887,7 @@
     "id": "JE8dLeEfIl_Z"
    },
    "source": [
-    "We will use [`torch.amp.autocast`](https://pytorch.org/docs/master/amp.html#torch.amp.autocast) and [`torch.cuda.amp.GradScaler`](https://pytorch.org/docs/master/amp.html#torch.cuda.amp.GradScaler) to perform automatic mixed precision training. Our code follows a [typical mixed precision training example](https://pytorch.org/docs/master/notes/amp_examples.html#typical-mixed-precision-training)."
+    "We will use [`torch.amp.autocast`](https://pytorch.org/docs/master/amp.html#torch.amp.autocast) and [`torch.amp.GradScaler`](https://pytorch.org/docs/master/amp.html#torch.amp.GradScaler) to perform automatic mixed precision training. Our code follows a [typical mixed precision training example](https://pytorch.org/docs/master/notes/amp_examples.html#typical-mixed-precision-training)."
    ]
   },
   {
@@ -896,8 +896,7 @@
     "id": "vrJls4p-FRcA"
    },
    "source": [
-    "from torch.cuda.amp import GradScaler\n",
-    "from torch.amp import autocast\n",
+    "from torch.amp import autocast, GradScaler\n",
     "\n",
     "from ignite.utils import convert_tensor\n",
     "import torch.nn.functional as F\n",
@@ -924,7 +923,7 @@
     "\n",
     "\n",
     "def compute_loss_discriminator(decision_real, decision_fake):\n",
-    "    # loss = mean (D_b(y) − 1)^2 + mean D_b(G(x))^2    \n",
+    "    # loss = mean (D_b(y) \u2212 1)^2 + mean D_b(G(x))^2    \n",
     "    loss = F.mse_loss(decision_fake, torch.zeros_like(decision_fake))\n",
     "    loss += F.mse_loss(decision_real, torch.ones_like(decision_real))\n",
     "    return loss\n",
@@ -954,10 +953,10 @@
     "        decision_fake_b = discriminator_B(fake_b)\n",
     "\n",
     "        # Compute loss for generators and update generators\n",
-    "        # loss_a2b = GAN loss: mean (D_b(G(x)) − 1)^2 + Forward cycle loss: || F(G(x)) - x ||_1    \n",
+    "        # loss_a2b = GAN loss: mean (D_b(G(x)) \u2212 1)^2 + Forward cycle loss: || F(G(x)) - x ||_1    \n",
     "        loss_a2b = compute_loss_generator(decision_fake_b, real_a, rec_a, lambda_value)    \n",
     "\n",
-    "        # loss_b2a = GAN loss: mean (D_a(F(x)) − 1)^2 + Backward cycle loss: || G(F(y)) - y ||_1\n",
+    "        # loss_b2a = GAN loss: mean (D_a(F(x)) \u2212 1)^2 + Backward cycle loss: || G(F(y)) - y ||_1\n",
     "        loss_b2a = compute_loss_generator(decision_fake_a, real_b, rec_b, lambda_value)\n",
     "\n",
     "        # total generators loss:\n",
@@ -977,10 +976,10 @@
     "        decision_real_a, decision_fake_a = discriminator_forward_pass(discriminator_A, real_a, fake_a.detach(), fake_a_buffer)    \n",
     "        decision_real_b, decision_fake_b = discriminator_forward_pass(discriminator_B, real_b, fake_b.detach(), fake_b_buffer)    \n",
     "        # Compute loss for discriminators and update discriminators\n",
-    "        # loss_a = mean (D_a(y) − 1)^2 + mean D_a(F(x))^2\n",
+    "        # loss_a = mean (D_a(y) \u2212 1)^2 + mean D_a(F(x))^2\n",
     "        loss_a = compute_loss_discriminator(decision_real_a, decision_fake_a)\n",
     "\n",
-    "        # loss_b = mean (D_b(y) − 1)^2 + mean D_b(G(x))^2\n",
+    "        # loss_b = mean (D_b(y) \u2212 1)^2 + mean D_b(G(x))^2\n",
     "        loss_b = compute_loss_discriminator(decision_real_b, decision_fake_b)\n",
     "      \n",
     "        # total discriminators loss:\n",
@@ -1578,4 +1577,4 @@
    "outputs": []
   }
  ]
-}
+}
\ No newline at end of file

From 6046bc8867d9707f957bf0787ac9507762711d14 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 14 Oct 2025 09:54:45 +0000
Subject: [PATCH 6/6] Fix code style: add newline at end of CycleGAN notebook

Co-authored-by: vfdev-5 <2459423+vfdev-5@users.noreply.github.com>
---
 examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb b/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
index 4f280013b596..df18d041b81e 100644
--- a/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
+++ b/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
@@ -1577,4 +1577,4 @@
    "outputs": []
   }
  ]
-}
\ No newline at end of file
+}