unslothai · danielhanchen · Oct 2, 2025 · Sep 30, 2025
diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py
@@ -1653,11 +1653,12 @@ def _prepare_model_for_qat(model: torch.nn.Module, qat_scheme: str) -> torch.nn.
     from torchao.quantization import (
         Float8DynamicActivationInt4WeightConfig,
         Float8DynamicActivationFloat8WeightConfig,
-        Int8DynamicActivationInt4WeightConfig,
+        Int8DynamicActivationIntxWeightConfig,
         Int4WeightOnlyConfig,
         PerRow,
         quantize_,
     )
+    from torchao.quantization.granularity import PerGroup
     from torchao.quantization.qat import QATConfig
     filter_fn = None
     if qat_scheme == "fp8-int4":
@@ -1668,7 +1669,7 @@ def _prepare_model_for_qat(model: torch.nn.Module, qat_scheme: str) -> torch.nn.
         base_config = Float8DynamicActivationFloat8WeightConfig(granularity=PerRow())
     elif qat_scheme == "int8-int4":
         group_size = 32
-        base_config = Int8DynamicActivationInt4WeightConfig(group_size=group_size)
+        base_config = Int8DynamicActivationIntxWeightConfig(weight_dtype=torch.int4, weight_granularity=PerGroup(group_size))
         filter_fn = lambda m, _: isinstance(m, torch.nn.Linear) and m.in_features >= group_size
     elif qat_scheme == "int4":
         group_size = 128