bghira · bghira · Oct 26, 2025 · Oct 26, 2025 · Oct 26, 2025 · Oct 26, 2025
diff --git a/simpletuner/helpers/configuration/cmd_args.py b/simpletuner/helpers/configuration/cmd_args.py
@@ -837,6 +837,47 @@ def _set_tf32(enabled: bool) -> None:
     elif args.sana_complex_human_instruction == "None":
         args.sana_complex_human_instruction = None
 
+    if isinstance(getattr(args, "validation_adapter_path", None), str):
+        candidate = args.validation_adapter_path.strip()
+        args.validation_adapter_path = candidate or None
+
+    if getattr(args, "validation_adapter_config", None):
+        args.validation_adapter_config = _parse_json_like_option(
+            args.validation_adapter_config,
+            "--validation_adapter_config",
+        )
+
+    if args.validation_adapter_path and args.validation_adapter_config:
+        raise ValueError("Provide either --validation_adapter_path or --validation_adapter_config, not both.")
+
+    if isinstance(getattr(args, "validation_adapter_name", None), str):
+        candidate = args.validation_adapter_name.strip()
+        args.validation_adapter_name = candidate or None
+
+    strength_value = getattr(args, "validation_adapter_strength", None)
+    if strength_value is None or strength_value in ("", "None"):
+        args.validation_adapter_strength = 1.0
+    else:
+        try:
+            strength = float(strength_value)
+        except (TypeError, ValueError):
+            raise ValueError(f"Invalid --validation_adapter_strength value: {strength_value}") from None
+        if strength <= 0:
+            raise ValueError("--validation_adapter_strength must be greater than 0.")
+        args.validation_adapter_strength = strength
+
+    mode_value = getattr(args, "validation_adapter_mode", None)
+    if mode_value in (None, "", "None"):
+        args.validation_adapter_mode = "adapter_only"
+    else:
+        normalized_mode = str(mode_value).strip().lower()
+        valid_modes = {"adapter_only", "comparison", "none"}
+        if normalized_mode not in valid_modes:
+            raise ValueError(
+                f"Invalid --validation_adapter_mode '{mode_value}'. Expected one of: {', '.join(sorted(valid_modes))}."
+            )
+        args.validation_adapter_mode = normalized_mode
+
     if args.attention_mechanism != "diffusers" and not torch.cuda.is_available():
         warning_log("For non-CUDA systems, only Diffusers attention mechanism is officially supported.")
 

diff --git a/simpletuner/helpers/models/chroma/pipeline.py b/simpletuner/helpers/models/chroma/pipeline.py
@@ -45,6 +45,7 @@
 from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection, T5EncoderModel, T5TokenizerFast
 
 from simpletuner.helpers.models.chroma.transformer import ChromaTransformer2DModel
+from simpletuner.helpers.utils.offloading import restore_offload_state, unpack_offload_state
 
 if is_torch_xla_available():
     import torch_xla.core.xla_model as xm
@@ -423,7 +424,12 @@ def load_lora_into_controlnet(
 
             # In case the pipeline has been already offloaded to CPU - temporarily remove the hooks
             # otherwise loading LoRA weights will lead to an error
-            is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
+            offload_state = cls._optionally_disable_offloading(_pipeline)
+            (
+                is_model_cpu_offload,
+                is_sequential_cpu_offload,
+                is_group_offload,
+            ) = unpack_offload_state(offload_state)
 
             peft_kwargs = {}
             if is_peft_version(">=", "0.13.1"):
@@ -447,7 +453,8 @@ def load_lora_into_controlnet(
             if warn_msg:
                 logger.warning(warn_msg)
 
-            cls._optionally_enable_offloading(is_model_cpu_offload, is_sequential_cpu_offload, _pipeline)
+            # Offload back.
+            restore_offload_state(_pipeline, is_model_cpu_offload, is_sequential_cpu_offload, is_group_offload)
 
     @classmethod
     def load_lora_into_transformer(
@@ -527,7 +534,12 @@ def load_lora_into_transformer(
 
             # In case the pipeline has been already offloaded to CPU - temporarily remove the hooks
             # otherwise loading LoRA weights will lead to an error
-            is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
+            offload_state = cls._optionally_disable_offloading(_pipeline)
+            (
+                is_model_cpu_offload,
+                is_sequential_cpu_offload,
+                is_group_offload,
+            ) = unpack_offload_state(offload_state)
 
             peft_kwargs = {}
             if is_peft_version(">=", "0.13.1"):
@@ -551,7 +563,8 @@ def load_lora_into_transformer(
             if warn_msg:
                 logger.warning(warn_msg)
 
-            cls._optionally_enable_offloading(is_model_cpu_offload, is_sequential_cpu_offload, _pipeline)
+            # Offload back.
+            restore_offload_state(_pipeline, is_model_cpu_offload, is_sequential_cpu_offload, is_group_offload)
 
     # Copied from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin.load_lora_into_text_encoder
     def load_lora_into_text_encoder(

diff --git a/simpletuner/helpers/models/flux/pipeline.py b/simpletuner/helpers/models/flux/pipeline.py
@@ -66,6 +66,8 @@
     T5TokenizerFast,
 )
 
+from simpletuner.helpers.utils.offloading import restore_offload_state, unpack_offload_state
+
 if is_torch_xla_available():
     import torch_xla.core.xla_model as xm
 
@@ -521,7 +523,12 @@ def load_lora_into_controlnet(
                 adapter_name = get_adapter_name(controlnet)
 
             # In case the pipeline has been already offloaded to CPU - temporarily remove the hooks
-            is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
+            offload_state = cls._optionally_disable_offloading(_pipeline)
+            (
+                is_model_cpu_offload,
+                is_sequential_cpu_offload,
+                is_group_offload,
+            ) = unpack_offload_state(offload_state)
 
             peft_kwargs = {}
             if is_peft_version(">=", "0.13.1"):
@@ -534,10 +541,7 @@ def load_lora_into_controlnet(
                 logger.info(f"Loaded ControlNet LoRA with incompatible keys: {incompatible_keys}")
 
             # Offload back.
-            if is_model_cpu_offload:
-                _pipeline.enable_model_cpu_offload()
-            elif is_sequential_cpu_offload:
-                _pipeline.enable_sequential_cpu_offload()
+            restore_offload_state(_pipeline, is_model_cpu_offload, is_sequential_cpu_offload, is_group_offload)
 
     @classmethod
     def load_lora_into_transformer(
@@ -617,7 +621,12 @@ def load_lora_into_transformer(
 
             # In case the pipeline has been already offloaded to CPU - temporarily remove the hooks
             # otherwise loading LoRA weights will lead to an error
-            is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
+            offload_state = cls._optionally_disable_offloading(_pipeline)
+            (
+                is_model_cpu_offload,
+                is_sequential_cpu_offload,
+                is_group_offload,
+            ) = unpack_offload_state(offload_state)
 
             peft_kwargs = {}
             if is_peft_version(">=", "0.13.1"):
@@ -652,10 +661,7 @@ def load_lora_into_transformer(
                 logger.warning(warn_msg)
 
             # Offload back.
-            if is_model_cpu_offload:
-                _pipeline.enable_model_cpu_offload()
-            elif is_sequential_cpu_offload:
-                _pipeline.enable_sequential_cpu_offload()
+            restore_offload_state(_pipeline, is_model_cpu_offload, is_sequential_cpu_offload, is_group_offload)
             # Unsafe code />
 
     @classmethod
@@ -769,7 +775,12 @@ def load_lora_into_text_encoder(
                 if adapter_name is None:
                     adapter_name = get_adapter_name(text_encoder)
 
-                is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
+                offload_state = cls._optionally_disable_offloading(_pipeline)
+                (
+                    is_model_cpu_offload,
+                    is_sequential_cpu_offload,
+                    is_group_offload,
+                ) = unpack_offload_state(offload_state)
 
                 # inject LoRA layers and load the state dict
                 # in transformers we automatically check whether the adapter name is already in use or not
@@ -786,10 +797,8 @@ def load_lora_into_text_encoder(
                 text_encoder.to(device=text_encoder.device, dtype=text_encoder.dtype)
 
                 # Offload back.
-                if is_model_cpu_offload:
-                    _pipeline.enable_model_cpu_offload()
-                elif is_sequential_cpu_offload:
-                    _pipeline.enable_sequential_cpu_offload()
+                restore_offload_state(_pipeline, is_model_cpu_offload, is_sequential_cpu_offload, is_group_offload)
+
                 # Unsafe code />
 
     @classmethod

diff --git a/simpletuner/helpers/models/hidream/pipeline.py b/simpletuner/helpers/models/hidream/pipeline.py
@@ -46,6 +46,7 @@
 )
 
 from simpletuner.helpers.models.hidream.schedule import FlowUniPCMultistepScheduler
+from simpletuner.helpers.utils.offloading import restore_offload_state, unpack_offload_state
 
 if is_torch_xla_available():
     import torch_xla.core.xla_model as xm
@@ -486,7 +487,12 @@ def load_lora_into_controlnet(
                 adapter_name = get_adapter_name(controlnet)
 
             # In case the pipeline has been already offloaded to CPU - temporarily remove the hooks
-            is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
+            offload_state = cls._optionally_disable_offloading(_pipeline)
+            (
+                is_model_cpu_offload,
+                is_sequential_cpu_offload,
+                is_group_offload,
+            ) = unpack_offload_state(offload_state)
 
             peft_kwargs = {}
             if is_peft_version(">=", "0.13.1"):
@@ -499,10 +505,7 @@ def load_lora_into_controlnet(
                 logger.info(f"Loaded ControlNet LoRA with incompatible keys: {incompatible_keys}")
 
             # Offload back.
-            if is_model_cpu_offload:
-                _pipeline.enable_model_cpu_offload()
-            elif is_sequential_cpu_offload:
-                _pipeline.enable_sequential_cpu_offload()
+            restore_offload_state(_pipeline, is_model_cpu_offload, is_sequential_cpu_offload, is_group_offload)
 
     @classmethod
     def load_lora_into_transformer(
@@ -638,7 +641,12 @@ def load_lora_into_text_encoder(
             if adapter_name is None:
                 adapter_name = get_adapter_name(text_encoder)
 
-            is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
+            offload_state = cls._optionally_disable_offloading(_pipeline)
+            (
+                is_model_cpu_offload,
+                is_sequential_cpu_offload,
+                is_group_offload,
+            ) = unpack_offload_state(offload_state)
 
             # inject LoRA layers and load the state dict
             # in transformers we automatically check whether the adapter name is already in use or not
@@ -655,10 +663,7 @@ def load_lora_into_text_encoder(
             text_encoder.to(device=text_encoder.device, dtype=text_encoder.dtype)
 
             # Offload back.
-            if is_model_cpu_offload:
-                _pipeline.enable_model_cpu_offload()
-            elif is_sequential_cpu_offload:
-                _pipeline.enable_sequential_cpu_offload()
+            restore_offload_state(_pipeline, is_model_cpu_offload, is_sequential_cpu_offload, is_group_offload)
 
     @classmethod
     def save_lora_weights(

diff --git a/simpletuner/helpers/models/sd3/pipeline.py b/simpletuner/helpers/models/sd3/pipeline.py
@@ -53,6 +53,8 @@
 from huggingface_hub.utils import validate_hf_hub_args
 from transformers import CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
 
+from simpletuner.helpers.utils.offloading import restore_offload_state, unpack_offload_state
+
 if is_torch_xla_available():
     import torch_xla.core.xla_model as xm
 
@@ -498,7 +500,12 @@ def load_lora_into_controlnet(
                 adapter_name = get_adapter_name(controlnet)
 
             # In case the pipeline has been already offloaded to CPU - temporarily remove the hooks
-            is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
+            offload_state = cls._optionally_disable_offloading(_pipeline)
+            (
+                is_model_cpu_offload,
+                is_sequential_cpu_offload,
+                is_group_offload,
+            ) = unpack_offload_state(offload_state)
 
             peft_kwargs = {}
             if is_peft_version(">=", "0.13.1"):
@@ -511,10 +518,7 @@ def load_lora_into_controlnet(
                 logger.info(f"Loaded ControlNet LoRA with incompatible keys: {incompatible_keys}")
 
             # Offload back.
-            if is_model_cpu_offload:
-                _pipeline.enable_model_cpu_offload()
-            elif is_sequential_cpu_offload:
-                _pipeline.enable_sequential_cpu_offload()
+            restore_offload_state(_pipeline, is_model_cpu_offload, is_sequential_cpu_offload, is_group_offload)
 
     @classmethod
     def load_lora_into_transformer(
@@ -584,7 +588,12 @@ def load_lora_into_transformer(
 
             # In case the pipeline has been already offloaded to CPU - temporarily remove the hooks
             # otherwise loading LoRA weights will lead to an error
-            is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
+            offload_state = cls._optionally_disable_offloading(_pipeline)
+            (
+                is_model_cpu_offload,
+                is_sequential_cpu_offload,
+                is_group_offload,
+            ) = unpack_offload_state(offload_state)
 
             peft_kwargs = {}
             if is_peft_version(">=", "0.13.1"):
@@ -619,10 +628,7 @@ def load_lora_into_transformer(
                 logger.warning(warn_msg)
 
             # Offload back.
-            if is_model_cpu_offload:
-                _pipeline.enable_model_cpu_offload()
-            elif is_sequential_cpu_offload:
-                _pipeline.enable_sequential_cpu_offload()
+            restore_offload_state(_pipeline, is_model_cpu_offload, is_sequential_cpu_offload, is_group_offload)
             # Unsafe code />
 
     @classmethod
@@ -736,7 +742,12 @@ def load_lora_into_text_encoder(
                 if adapter_name is None:
                     adapter_name = get_adapter_name(text_encoder)
 
-                is_model_cpu_offload, is_sequential_cpu_offload = cls._optionally_disable_offloading(_pipeline)
+                offload_state = cls._optionally_disable_offloading(_pipeline)
+                (
+                    is_model_cpu_offload,
+                    is_sequential_cpu_offload,
+                    is_group_offload,
+                ) = unpack_offload_state(offload_state)
 
                 # inject LoRA layers and load the state dict
                 # in transformers we automatically check whether the adapter name is already in use or not
@@ -753,10 +764,7 @@ def load_lora_into_text_encoder(
                 text_encoder.to(device=text_encoder.device, dtype=text_encoder.dtype)
 
                 # Offload back.
-                if is_model_cpu_offload:
-                    _pipeline.enable_model_cpu_offload()
-                elif is_sequential_cpu_offload:
-                    _pipeline.enable_sequential_cpu_offload()
+                restore_offload_state(_pipeline, is_model_cpu_offload, is_sequential_cpu_offload, is_group_offload)
                 # Unsafe code />
 
     @classmethod