improve qat (#3446)

Etherll · web-flow · commit 136897870a38 · 2025-10-13T17:03:15.000-07:00
* Update save.py

* Update vision.py

* Update save.py
diff --git a/unsloth/models/vision.py b/unsloth/models/vision.py
@@ -28,7 +28,7 @@
 from ..kernels import (
     post_patch_loss_function,
 )
-from ._utils import __version__, importlib_version
+from ._utils import __version__, importlib_version, _prepare_model_for_qat
 from ._utils import *
 from ..save import patch_saving_functions
 from peft import LoraConfig, TaskType, get_peft_model as _get_peft_model
@@ -796,6 +796,7 @@ def get_peft_model(
         loftq_config               = {},
         task_type                  = TaskType.CAUSAL_LM,
         temporary_location         = "_unsloth_temporary_saved_buffers",
+        qat_scheme                 = None,
         **kwargs
     ):
         if os.environ.get("UNSLOTH_ENABLE_FULL_FINETUNING", "0") == "1":
@@ -871,6 +872,11 @@ def get_peft_model(
             use_gradient_checkpointing = use_gradient_checkpointing,
         )
         model = _get_peft_model(model, lora_config)
+        # Apply QAT + LoRA if specified
+        if qat_scheme is not None:
+            print("Unsloth: Applying QAT to mitigate quantization degradation")
+            model = _prepare_model_for_qat(model, qat_scheme)
+        pass
         # Fix LoraConfig.auto_mapping is None
         fix_lora_auto_mapping(model)
         # Enable gradients on modules which are trainable
diff --git a/unsloth/save.py b/unsloth/save.py
@@ -44,6 +44,7 @@
     pass
 pass
 from pathlib import Path
+from peft import PeftModelForCausalLM, PeftModel
 
 __all__ = [
     "print_quantization_methods",
@@ -2522,7 +2523,12 @@ def unsloth_save_pretrained_torchao(
     arguments["save_method"]  = "merged_16bit" # Must be 16bit
     del arguments["self"]
     del arguments["torchao_config"]
-    unsloth_generic_save(**arguments)
+
+    if not isinstance(self, PeftModelForCausalLM) and not isinstance(self, PeftModel):
+      self.save_pretrained(save_directory)
+      tokenizer.save_pretrained(save_directory)
+    else:
+      unsloth_generic_save(**arguments)
     for _ in range(3):
         gc.collect()