[SW-209062] Disable default sdpa in Albert (#22)

ugolowic · astachowiczhabana · commit ef54499c6cc1 · 2024-11-26T13:36:01.000+02:00
Transformers v4.45 introduced sdpa as the default implementation
in Albet. This caused performance drop.
Adding Albert to the list of models which don't yet have sdpa
implementation in Gaudi and use thus eager attention.
diff --git a/optimum/habana/transformers/models/modeling_all_models.py b/optimum/habana/transformers/models/modeling_all_models.py
@@ -115,7 +115,7 @@ def gaudi_conv1d_forward(self, x):
 @classmethod
 def gaudi_check_and_enable_sdpa(cls, config, hard_check_only: bool = False) -> PretrainedConfig:
     # This model doesn't support SDPA in Gaudi yet, fallback to original code.
-    MODELS_ATTN_IMPLEMENTATION_EAGER = ["bart", "gpt_bigcode", "mistral", "mixtral", "wav2vec2", "roberta"]
+    MODELS_ATTN_IMPLEMENTATION_EAGER = ["albert", "bart", "gpt_bigcode", "mistral", "mixtral", "wav2vec2", "roberta"]
 
     if config.model_type in MODELS_ATTN_IMPLEMENTATION_EAGER:
         config._attn_implementation = "eager"