huggingface · ArthurZucker · Aug 12, 2025 · Jul 29, 2025 · Aug 2, 2025 · Aug 2, 2025
diff --git a/examples/modular-transformers/modeling_dummy_bert.py b/examples/modular-transformers/modeling_dummy_bert.py
@@ -302,9 +302,7 @@ def forward(
         # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling.
         # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create
         # a causal mask in case tgt_len == 1.
-        is_causal = (
-            True if self.is_decoder and not is_cross_attention and attention_mask is None and tgt_len > 1 else False
-        )
+        is_causal = self.is_decoder and not is_cross_attention and attention_mask is None and tgt_len > 1
 
         attn_output = torch.nn.functional.scaled_dot_product_attention(
             query_layer,

diff --git a/examples/modular-transformers/modeling_roberta.py b/examples/modular-transformers/modeling_roberta.py
@@ -305,9 +305,7 @@ def forward(
         # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling.
         # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create
         # a causal mask in case tgt_len == 1.
-        is_causal = (
-            True if self.is_decoder and not is_cross_attention and attention_mask is None and tgt_len > 1 else False
-        )
+        is_causal = self.is_decoder and not is_cross_attention and attention_mask is None and tgt_len > 1
 
         attn_output = torch.nn.functional.scaled_dot_product_attention(
             query_layer,

diff --git a/pyproject.toml b/pyproject.toml
@@ -19,10 +19,13 @@ line-length = 119
 
 [tool.ruff.lint]
 # Never enforce `E501` (line length violations).
-ignore = ["C901", "E501", "E741", "F402", "F823"]
+# SIM300: Yoda condition detected
+# SIM212: Checks for if expressions that check against a negated condition.
+# SIM905: Consider using a list literal instead of `str.split`
+ignore = ["C901", "E501", "E741", "F402", "F823", "SIM1", "SIM300", "SIM212", "SIM905"]
 # RUF013: Checks for the use of implicit Optional
 #  in type annotations when the default parameter value is None.
-select = ["C", "E", "F", "I", "W", "RUF013", "UP006", "PERF102", "PLC1802", "PLC0208"]
+select = ["C", "E", "F", "I", "W", "RUF013", "UP006", "PERF102", "PLC1802", "PLC0208","SIM"]
 extend-safe-fixes = ["UP006"]
 
 # Ignore import violations in all `__init__.py` files.

diff --git a/src/transformers/commands/serving.py b/src/transformers/commands/serving.py
@@ -897,7 +897,7 @@ def generate_chat_completion(self, req: dict) -> Generator[str, None, None]:
         inputs = processor.apply_chat_template(
             processor_inputs,
             add_generation_prompt=True,
-            tools=req.get("tools", None),
+            tools=req.get("tools"),
             return_tensors="pt",
             return_dict=True,
             tokenize=True,

diff --git a/src/transformers/data/data_collator.py b/src/transformers/data/data_collator.py
@@ -183,7 +183,7 @@ def tf_default_data_collator(features: list[InputDataClass]) -> dict[str, Any]:
     if label_col_name is not None:
         if isinstance(first[label_col_name], tf.Tensor):
             dtype = tf.int64 if first[label_col_name].dtype.is_integer else tf.float32
-        elif isinstance(first[label_col_name], np.ndarray) or isinstance(first[label_col_name], np.generic):
+        elif isinstance(first[label_col_name], (np.ndarray, np.generic)):
             dtype = tf.int64 if np.issubdtype(first[label_col_name].dtype, np.integer) else tf.float32
         elif isinstance(first[label_col_name], (tuple, list)):
             dtype = tf.int64 if isinstance(first[label_col_name][0], int) else tf.float32

diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py
@@ -645,8 +645,8 @@ def prepare_inputs_for_generation(
 
             # If it's not defined, it means the model uses the new general mask API
             if causal_mask_creation_function is None:  # can't be found
-                token_type_ids = model_inputs.get("token_type_ids", None)
-                position_ids = model_inputs.get(position_ids_key, None)
+                token_type_ids = model_inputs.get("token_type_ids")
+                position_ids = model_inputs.get(position_ids_key)
                 # Some models may overwrite the general one
                 causal_mask_creation_function = getattr(self, "create_masks_for_generate", create_masks_for_generate)
                 attention_mask = causal_mask_creation_function(

diff --git a/src/transformers/integrations/flex_attention.py b/src/transformers/integrations/flex_attention.py
@@ -277,7 +277,7 @@ def score_mod(score, batch_idx, head_idx, q_idx, kv_idx):
     num_local_query_heads = query.shape[1]
 
     # When running TP this helps:
-    if not ((num_local_query_heads & (num_local_query_heads - 1)) == 0):
+    if (num_local_query_heads & (num_local_query_heads - 1)) != 0:
         key = repeat_kv(key, query.shape[1] // key.shape[1])
         value = repeat_kv(value, query.shape[1] // value.shape[1])
         enable_gqa = False

diff --git a/src/transformers/integrations/vptq.py b/src/transformers/integrations/vptq.py
@@ -45,7 +45,7 @@ def replace_with_vptq_linear(
             should not be passed by the user.
     """
 
-    modules_to_not_convert = ["lm_head"] if not modules_to_not_convert else modules_to_not_convert
+    modules_to_not_convert = modules_to_not_convert if modules_to_not_convert else ["lm_head"]
 
     for name, module in model.named_children():
         if current_key_name is None:

diff --git a/src/transformers/keras_callbacks.py b/src/transformers/keras_callbacks.py
@@ -167,7 +167,7 @@ def _postprocess_predictions_or_labels(self, inputs):
             # If it's a dict with only one key, just return the array
             if len(outputs) == 1:
                 outputs = list(outputs.values())[0]
-        elif isinstance(inputs[0], list) or isinstance(inputs[0], tuple):
+        elif isinstance(inputs[0], (tuple, list)):
             outputs = []
             for input_list in zip(*inputs):
                 outputs.append(self._concatenate_batches(input_list))

diff --git a/src/transformers/models/aya_vision/configuration_aya_vision.py b/src/transformers/models/aya_vision/configuration_aya_vision.py
@@ -81,9 +81,7 @@ def __init__(
         self.vision_feature_layer = vision_feature_layer
 
         if isinstance(vision_config, dict):
-            vision_config["model_type"] = (
-                vision_config["model_type"] if "model_type" in vision_config else "siglip_vision_model"
-            )
+            vision_config["model_type"] = vision_config.get("model_type", "siglip_vision_model")
             vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config)
         elif vision_config is None:
             vision_config = CONFIG_MAPPING["siglip_vision_model"](
@@ -99,7 +97,7 @@ def __init__(
         self.vision_config = vision_config
 
         if isinstance(text_config, dict):
-            text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "cohere2"
+            text_config["model_type"] = text_config.get("model_type", "cohere2")
             text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
         elif text_config is None:
             text_config = CONFIG_MAPPING["cohere2"]()

diff --git a/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py
@@ -148,7 +148,7 @@ def rewrite_dict_keys(d):
     # (1) remove word breaking symbol, (2) add word ending symbol where the word is not broken up,
     # e.g.: d = {'le@@': 5, 'tt@@': 6, 'er': 7} => {'le': 5, 'tt': 6, 'er</w>': 7}
     d2 = dict((re.sub(r"@@$", "", k), v) if k.endswith("@@") else (re.sub(r"$", "</w>", k), v) for k, v in d.items())
-    keep_keys = "<s> <pad> </s> <unk>".split()
+    keep_keys = ["<s>", "<pad>", "</s>", "<unk>"]
     # restore the special tokens
     for k in keep_keys:
         del d2[f"{k}</w>"]

diff --git a/src/transformers/models/clvp/modeling_clvp.py b/src/transformers/models/clvp/modeling_clvp.py
@@ -1300,7 +1300,7 @@ def _prepare_model_inputs(
 
         # Check if conditioning_embeds are provided or not, if yes then concatenate the bos_token_id at the end of the conditioning_embeds.
         # Then we must subtract the positional_ids because during the forward pass it will be added anyways, so we must cancel them out here.
-        conditioning_embeds = model_kwargs.get("conditioning_embeds", None)
+        conditioning_embeds = model_kwargs.get("conditioning_embeds")
 
         if conditioning_embeds is not None:
             mel_start_token_embedding = self.model.decoder.input_embeds_layer(

diff --git a/src/transformers/models/cohere2_vision/configuration_cohere2_vision.py b/src/transformers/models/cohere2_vision/configuration_cohere2_vision.py
@@ -57,9 +57,7 @@ def __init__(
         self.alignment_intermediate_size = alignment_intermediate_size
 
         if isinstance(vision_config, dict):
-            vision_config["model_type"] = (
-                vision_config["model_type"] if "model_type" in vision_config else "siglip_vision_model"
-            )
+            vision_config["model_type"] = vision_config.get("model_type", "siglip_vision_model")
             vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config)
         elif vision_config is None:
             vision_config = CONFIG_MAPPING["siglip_vision_model"](
@@ -73,7 +71,7 @@ def __init__(
         self.vision_config = vision_config
 
         if isinstance(text_config, dict):
-            text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "cohere2"
+            text_config["model_type"] = text_config.get("model_type", "cohere2")
             text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
         elif text_config is None:
             text_config = CONFIG_MAPPING["cohere2"](tie_word_embeddings=True)

diff --git a/src/transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py b/src/transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py
@@ -66,7 +66,7 @@ class DeepseekVLImageProcessorFast(BaseImageProcessorFast):
 
     def __init__(self, **kwargs: Unpack[DeepseekVLFastImageProcessorKwargs]):
         super().__init__(**kwargs)
-        if kwargs.get("image_mean", None) is None:
+        if kwargs.get("image_mean") is None:
             background_color = (127, 127, 127)
         else:
             background_color = tuple([int(x * 255) for x in kwargs.get("image_mean")])

diff --git a/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py b/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py
@@ -93,11 +93,11 @@ class DeepseekVLHybridImageProcessorFast(BaseImageProcessorFast):
     high_res_resample = PILImageResampling.BICUBIC
 
     def __init__(self, **kwargs: Unpack[DeepseekVLHybridFastImageProcessorKwargs]):
-        if kwargs.get("image_mean", None) is None:
+        if kwargs.get("image_mean") is None:
             background_color = (127, 127, 127)
         else:
             background_color = tuple([int(x * 255) for x in kwargs.get("image_mean")])
-        if kwargs.get("high_res_image_mean", None) is None:
+        if kwargs.get("high_res_image_mean") is None:
             high_res_background_color = (127, 127, 127)
         else:
             high_res_background_color = tuple([int(x * 255) for x in kwargs.get("high_res_image_mean")])

diff --git a/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py b/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py
@@ -749,11 +749,11 @@ class DeepseekVLHybridImageProcessorFast(DeepseekVLImageProcessorFast):
     high_res_resample = PILImageResampling.BICUBIC
 
     def __init__(self, **kwargs: Unpack[DeepseekVLHybridFastImageProcessorKwargs]):
-        if kwargs.get("image_mean", None) is None:
+        if kwargs.get("image_mean") is None:
             background_color = (127, 127, 127)
         else:
             background_color = tuple([int(x * 255) for x in kwargs.get("image_mean")])
-        if kwargs.get("high_res_image_mean", None) is None:
+        if kwargs.get("high_res_image_mean") is None:
             high_res_background_color = (127, 127, 127)
         else:
             high_res_background_color = tuple([int(x * 255) for x in kwargs.get("high_res_image_mean")])

diff --git a/src/transformers/models/fsmt/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/fsmt/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py
@@ -79,7 +79,7 @@ def rewrite_dict_keys(d):
     # (1) remove word breaking symbol, (2) add word ending symbol where the word is not broken up,
     # e.g.: d = {'le@@': 5, 'tt@@': 6, 'er': 7} => {'le': 5, 'tt': 6, 'er</w>': 7}
     d2 = dict((re.sub(r"@@$", "", k), v) if k.endswith("@@") else (re.sub(r"$", "</w>", k), v) for k, v in d.items())
-    keep_keys = "<s> <pad> </s> <unk>".split()
+    keep_keys = ["<s>", "<pad>", "</s>", "<unk>"]
     # restore the special tokens
     for k in keep_keys:
         del d2[f"{k}</w>"]

diff --git a/src/transformers/models/janus/image_processing_janus_fast.py b/src/transformers/models/janus/image_processing_janus_fast.py
@@ -71,7 +71,7 @@ class JanusImageProcessorFast(BaseImageProcessorFast):
     valid_kwargs = JanusFastImageProcessorKwargs
 
     def __init__(self, **kwargs: Unpack[JanusFastImageProcessorKwargs]):
-        if kwargs.get("image_mean", None) is None:
+        if kwargs.get("image_mean") is None:
             background_color = (127, 127, 127)
         else:
             background_color = tuple([int(x * 255) for x in kwargs.get("image_mean")])

diff --git a/src/transformers/models/jetmoe/modeling_jetmoe.py b/src/transformers/models/jetmoe/modeling_jetmoe.py
@@ -846,9 +846,7 @@ def _init_weights(self, module):
             module.weight.data.fill_(1.0)
         elif isinstance(module, JetMoeParallelExperts):
             module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
-        elif isinstance(module, JetMoeMoA):
-            module.bias.data.zero_()
-        elif isinstance(module, JetMoeMoE):
+        elif isinstance(module, (JetMoeMoA, JetMoeMoE)):
             module.bias.data.zero_()
 
 

diff --git a/src/transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py b/src/transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py
@@ -260,7 +260,7 @@ def __init__(
         self.disable_custom_kernels = disable_custom_kernels
         # Text backbone
         if isinstance(text_config, dict):
-            text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "bert"
+            text_config["model_type"] = text_config.get("model_type", "bert")
             text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
         elif text_config is None:
             text_config = CONFIG_MAPPING["bert"]()

diff --git a/src/transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py b/src/transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py
@@ -268,7 +268,7 @@ def __init__(
         self.disable_custom_kernels = disable_custom_kernels
         # Text backbone
         if isinstance(text_config, dict):
-            text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "bert"
+            text_config["model_type"] = text_config.get("model_type", "bert")
             text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
         elif text_config is None:
             text_config = CONFIG_MAPPING["bert"]()

diff --git a/src/transformers/models/oneformer/modeling_oneformer.py b/src/transformers/models/oneformer/modeling_oneformer.py
@@ -2794,11 +2794,7 @@ def _init_weights(self, module: nn.Module):
             nn.init.constant_(module.output_proj.bias.data, 0.0)
         elif isinstance(module, OneFormerPixelDecoder):
             nn.init.normal_(module.level_embed, std=0)
-        elif isinstance(module, OneFormerTransformerDecoderLayer):
-            for p in module.parameters():
-                if p.dim() > 1:
-                    nn.init.xavier_uniform_(p, gain=xavier_std)
-        elif isinstance(module, OneFormerTransformerDecoderQueryTransformer):
+        elif isinstance(module, (OneFormerTransformerDecoderLayer, OneFormerTransformerDecoderQueryTransformer)):
             for p in module.parameters():
                 if p.dim() > 1:
                     nn.init.xavier_uniform_(p, gain=xavier_std)

diff --git a/src/transformers/pipelines/fill_mask.py b/src/transformers/pipelines/fill_mask.py
@@ -197,7 +197,7 @@ def get_target_ids(self, targets, top_k=None):
             vocab = {}
         target_ids = []
         for target in targets:
-            id_ = vocab.get(target, None)
+            id_ = vocab.get(target)
             if id_ is None:
                 input_ids = self.tokenizer(
                     target,

diff --git a/src/transformers/pipelines/token_classification.py b/src/transformers/pipelines/token_classification.py
@@ -427,9 +427,11 @@ def aggregate_overlapping_entities(self, entities):
             if previous_entity["start"] <= entity["start"] < previous_entity["end"]:
                 current_length = entity["end"] - entity["start"]
                 previous_length = previous_entity["end"] - previous_entity["start"]
-                if current_length > previous_length:
-                    previous_entity = entity
-                elif current_length == previous_length and entity["score"] > previous_entity["score"]:
+                if (
+                    current_length > previous_length
+                    or current_length == previous_length
+                    and entity["score"] > previous_entity["score"]
+                ):
                     previous_entity = entity
             else:
                 aggregated_entities.append(previous_entity)

diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py
@@ -2622,9 +2622,7 @@ def nested_simplify(obj, decimals=3):
         return nested_simplify(obj.tolist())
     elif isinstance(obj, Mapping):
         return {nested_simplify(k, decimals): nested_simplify(v, decimals) for k, v in obj.items()}
-    elif isinstance(obj, (str, int, np.int64)):
-        return obj
-    elif obj is None:
+    elif isinstance(obj, (str, int, np.int64)) or obj is None:
         return obj
     elif is_torch_available() and isinstance(obj, torch.Tensor):
         return nested_simplify(obj.tolist(), decimals)

diff --git a/src/transformers/tokenization_mistral_common.py b/src/transformers/tokenization_mistral_common.py
@@ -1784,9 +1784,7 @@ def from_pretrained(
                 pathlib_repo_file = Path(path)
                 file_name = pathlib_repo_file.name
                 suffix = "".join(pathlib_repo_file.suffixes)
-                if file_name == "tekken.json":
-                    valid_tokenizer_files.append(file_name)
-                elif suffix in sentencepiece_suffixes:
+                if file_name == "tekken.json" or suffix in sentencepiece_suffixes:
                     valid_tokenizer_files.append(file_name)
 
             if len(valid_tokenizer_files) == 0:

diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py
@@ -877,9 +877,11 @@ def get_input_ids(text):
 
         input_ids = []
         for ids_or_pair_ids in batch_text_or_text_pairs:
-            if not isinstance(ids_or_pair_ids, (list, tuple)):
-                ids, pair_ids = ids_or_pair_ids, None
-            elif is_split_into_words and not isinstance(ids_or_pair_ids[0], (list, tuple)):
+            if (
+                not isinstance(ids_or_pair_ids, (list, tuple))
+                or is_split_into_words
+                and not isinstance(ids_or_pair_ids[0], (list, tuple))
+            ):
                 ids, pair_ids = ids_or_pair_ids, None
             else:
                 ids, pair_ids = ids_or_pair_ids

diff --git a/src/transformers/trainer_pt_utils.py b/src/transformers/trainer_pt_utils.py
@@ -153,9 +153,7 @@ def find_batch_size(tensors):
             result = find_batch_size(value)
             if result is not None:
                 return result
-    elif isinstance(tensors, torch.Tensor):
-        return tensors.shape[0] if len(tensors.shape) >= 1 else None
-    elif isinstance(tensors, np.ndarray):
+    elif isinstance(tensors, (torch.Tensor, np.ndarray)):
         return tensors.shape[0] if len(tensors.shape) >= 1 else None
 
 
@@ -634,10 +632,7 @@ def __init__(
         self.batch_size = batch_size
         if lengths is None:
             model_input_name = model_input_name if model_input_name is not None else "input_ids"
-            if (
-                not (isinstance(dataset[0], dict) or isinstance(dataset[0], BatchEncoding))
-                or model_input_name not in dataset[0]
-            ):
+            if not isinstance(dataset[0], (dict, BatchEncoding)) or model_input_name not in dataset[0]:
                 raise ValueError(
                     "Can only automatically infer lengths for datasets whose items are dictionaries with an "
                     f"'{model_input_name}' key."
@@ -697,10 +692,7 @@ def __init__(
 
         if lengths is None:
             model_input_name = model_input_name if model_input_name is not None else "input_ids"
-            if (
-                not (isinstance(dataset[0], dict) or isinstance(dataset[0], BatchEncoding))
-                or model_input_name not in dataset[0]
-            ):
+            if not isinstance(dataset[0], (dict, BatchEncoding)) or model_input_name not in dataset[0]:
                 raise ValueError(
                     "Can only automatically infer lengths for datasets whose items are dictionaries with an "
                     f"'{model_input_name}' key."

diff --git a/src/transformers/utils/auto_docstring.py b/src/transformers/utils/auto_docstring.py
@@ -1411,8 +1411,8 @@ def _process_regular_parameters(
                     param_type = f"[`{class_name}`]"
                 else:
                     param_type = f"[`{param_type.split('.')[-1]}`]"
-            elif param_type == "" and False:  # TODO: Enforce typing for all parameters
-                print(f"🚨 {param_name} for {func.__qualname__} in file {func.__code__.co_filename} has no type")
+            # elif param_type == "" and False:  # TODO: Enforce typing for all parameters
+            #     print(f"🚨 {param_name} for {func.__qualname__} in file {func.__code__.co_filename} has no type")
             param_type = param_type if "`" in param_type else f"`{param_type}`"
             # Format the parameter docstring
             if additional_info:
@@ -1840,7 +1840,7 @@ def auto_class_docstring(cls, custom_intro=None, custom_args=None, checkpoint=No
             docstring += set_min_indent(f"\n{docstring_init}", indent_level)
         elif is_dataclass:
             # No init function, we have a data class
-            docstring += "\nArgs:\n" if not docstring_args else docstring_args
+            docstring += docstring_args if docstring_args else "\nArgs:\n"
             source_args_dict = get_args_doc_from_source(ModelOutputArgs)
             doc_class = cls.__doc__ if cls.__doc__ else ""
             documented_kwargs, _ = parse_docstring(doc_class)