FIX: Avoid caching in X-LoRA generate (#2384)

BenjaminBossan · web-flow · commit 1793a9531097 · 2025-02-18T17:29:40.000+01:00
X-LoRA tests started failing after this transformers PR: huggingface/transformers#35724 The solution appears to be to disable caching completely when calling generate on the X-LoRA model. This also makes some previously xfail-ing tests pass. I tested this locally with transformers checked out before and after the mentioned PR and the tests pass in both circumstances. I also tested changing the base model from "facebook/opt-125m" to "trl-internal-testing/tiny-random-LlamaForCausalLM" and the tests passed with both. Also, mark X-LoRA save_load_function test as flaky. It was marked as xfail beforehand, but it is in fact just flaky.
diff --git a/src/peft/tuners/xlora/model.py b/src/peft/tuners/xlora/model.py
@@ -314,6 +314,7 @@ def _maybe_freeze_all_adapters(self):
                     param.requires_grad = False
 
     def generate(self, *args, **kwargs):
+        kwargs["use_cache"] = False
         res = self.lora_model.generate(*args, **kwargs)  # type: ignore
         #  This is necessary because we use PeftModel.disable_adapter() which reenables the adapters
         self._maybe_freeze_all_adapters()
diff --git a/tests/test_xlora.py b/tests/test_xlora.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import os
+from functools import wraps
 
 import huggingface_hub
 import pytest
@@ -25,6 +26,25 @@
 from peft.utils import infer_device
 
 
+def flaky(num_tries: int):
+    """Decorator for test functions that are flaky"""
+
+    def decorator(func):
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            for _ in range(num_tries):
+                try:
+                    return func(*args, **kwargs)
+                except AssertionError as e:
+                    print(f"Failed test {func.__name__} with error: {e}")
+                    continue
+            raise AssertionError(f"Failed test {func.__name__} after {num_tries} tries")
+
+        return wrapper
+
+    return decorator
+
+
 class TestXlora:
     torch_device = infer_device()
 
@@ -128,8 +148,6 @@ def test_functional(self, tokenizer, model):
         )
         assert torch.isfinite(outputs[: inputs.shape[1] :]).all()
 
-    # TODO: fix the xfailing test
-    @pytest.mark.xfail
     def test_scalings_logging_methods(self, tokenizer, model):
         model.enable_scalings_logging()
 
@@ -182,8 +200,8 @@ def test_misc_methods(self, tokenizer, model):
 
         assert str(model) is not None
 
-    # TODO: On CI (but not locally), this test seems to have become flaky with the latest transformers changes (v4.45).
-    @pytest.mark.xfail
+    # On CI (but not locally), this test is flaky since transformers v4.45.0.
+    @flaky(num_tries=5)
     def test_save_load_functional(self, tokenizer, model, tmp_path):
         inputs = tokenizer.encode("Python is a", add_special_tokens=False, return_tensors="pt")
         outputs = model.generate(