unslothai · danielhanchen · Jun 18, 2025 · Jun 18, 2025
diff --git a/unsloth/models/cohere.py b/unsloth/models/cohere.py
@@ -466,7 +466,7 @@ def pre_patch():
         CohereDecoderLayer   .forward = CohereDecoderLayer_fast_forward
         CohereModel          .forward = LlamaModel_fast_forward
         CohereForCausalLM    .forward = CausalLM_fast_forward(CohereModel_fast_forward_inference)
-        PeftModelForCausalLM .forward = PeftModelForCausalLM_fast_forward
+        PeftModelForCausalLM .forward = PeftModel_fast_forward
         fix_prepare_inputs_for_generation(CohereForCausalLM)
 
         import transformers.models.cohere.modeling_cohere

diff --git a/unsloth/models/gemma.py b/unsloth/models/gemma.py
@@ -332,7 +332,7 @@ def pre_patch():
         GemmaDecoderLayer   .forward = GemmaDecoderLayer_fast_forward
         GemmaModel          .forward = LlamaModel_fast_forward
         GemmaForCausalLM    .forward = CausalLM_fast_forward(GemmaModel_fast_forward_inference)
-        PeftModelForCausalLM.forward = PeftModelForCausalLM_fast_forward
+        PeftModelForCausalLM.forward = PeftModel_fast_forward
         fix_prepare_inputs_for_generation(GemmaForCausalLM)
 
         # Solves https://github.com/unslothai/unsloth/issues/168

diff --git a/unsloth/models/gemma2.py b/unsloth/models/gemma2.py
@@ -477,7 +477,7 @@ def pre_patch():
         Gemma2DecoderLayer   .forward = Gemma2DecoderLayer_fast_forward
         Gemma2Model          .forward = LlamaModel_fast_forward
         Gemma2ForCausalLM    .forward = CausalLM_fast_forward(Gemma2Model_fast_forward_inference)
-        PeftModelForCausalLM .forward = PeftModelForCausalLM_fast_forward
+        PeftModelForCausalLM .forward = PeftModel_fast_forward
         fix_prepare_inputs_for_generation(Gemma2ForCausalLM)
 
         # Solves https://github.com/unslothai/unsloth/issues/168

diff --git a/unsloth/models/granite.py b/unsloth/models/granite.py
@@ -468,7 +468,7 @@ def pre_patch():
         GraniteModel          .forward  = LlamaModel_fast_forward
         GraniteForCausalLM    .forward  = CausalLM_fast_forward(GraniteModel_fast_forward_inference)
         GraniteForCausalLM    .__init__ = patched_init(GraniteForCausalLM.__init__)
-        PeftModelForCausalLM .forward = PeftModelForCausalLM_fast_forward
+        PeftModelForCausalLM .forward = PeftModel_fast_forward
         fix_prepare_inputs_for_generation(GraniteForCausalLM)
 
         import transformers.models.granite.modeling_granite

diff --git a/unsloth/models/mistral.py b/unsloth/models/mistral.py
@@ -368,7 +368,7 @@ def pre_patch():
         MistralDecoderLayer   .forward = LlamaDecoderLayer_fast_forward
         MistralModel          .forward = LlamaModel_fast_forward
         MistralForCausalLM    .forward = MistralForCausalLM_fast_forward
-        PeftModelForCausalLM  .forward = PeftModelForCausalLM_fast_forward
+        PeftModelForCausalLM  .forward = PeftModel_fast_forward
         fix_prepare_inputs_for_generation(MistralForCausalLM)
 
         # Solves https://github.com/unslothai/unsloth/issues/168

diff --git a/unsloth/models/qwen2.py b/unsloth/models/qwen2.py
@@ -55,7 +55,7 @@ def pre_patch():
         Qwen2DecoderLayer   .forward = LlamaDecoderLayer_fast_forward
         Qwen2Model          .forward = LlamaModel_fast_forward
         Qwen2ForCausalLM    .forward = CausalLM_fast_forward(LlamaModel_fast_forward_inference)
-        PeftModelForCausalLM.forward = PeftModelForCausalLM_fast_forward
+        PeftModelForCausalLM.forward = PeftModel_fast_forward
         fix_prepare_inputs_for_generation(Qwen2ForCausalLM)
 
         # Solves https://github.com/unslothai/unsloth/issues/168

diff --git a/unsloth/models/qwen3.py b/unsloth/models/qwen3.py
@@ -387,7 +387,7 @@ def pre_patch():
         Qwen3DecoderLayer   .forward = LlamaDecoderLayer_fast_forward
         Qwen3Model          .forward = LlamaModel_fast_forward
         Qwen3ForCausalLM    .forward = CausalLM_fast_forward(_LlamaModel_fast_forward_inference(Qwen3Attention_fast_forward_inference))
-        PeftModelForCausalLM.forward = PeftModelForCausalLM_fast_forward
+        PeftModelForCausalLM.forward = PeftModel_fast_forward
         fix_prepare_inputs_for_generation(Qwen3ForCausalLM)
 
         # Solves https://github.com/unslothai/unsloth/issues/168

diff --git a/unsloth/models/qwen3_moe.py b/unsloth/models/qwen3_moe.py
@@ -177,7 +177,7 @@ def pre_patch():
         Qwen3MoeDecoderLayer   .forward = Qwen3MoeDecoderLayer_fast_forward
         Qwen3MoeModel          .forward = LlamaModel_fast_forward
         Qwen3MoeForCausalLM    .forward = CausalLM_fast_forward(LlamaModel_fast_forward_inference)
-        PeftModelForCausalLM.forward = PeftModelForCausalLM_fast_forward
+        PeftModelForCausalLM.forward = PeftModel_fast_forward
         fix_prepare_inputs_for_generation(Qwen3MoeForCausalLM)
 
         # Solves https://github.com/unslothai/unsloth/issues/168