Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions examples/modular-transformers/modeling_dummy_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,9 +302,7 @@ def forward(
# in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling.
# The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create
# a causal mask in case tgt_len == 1.
is_causal = (
True if self.is_decoder and not is_cross_attention and attention_mask is None and tgt_len > 1 else False
)
is_causal = self.is_decoder and not is_cross_attention and attention_mask is None and tgt_len > 1

attn_output = torch.nn.functional.scaled_dot_product_attention(
query_layer,
Expand Down
4 changes: 1 addition & 3 deletions examples/modular-transformers/modeling_roberta.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,9 +305,7 @@ def forward(
# in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling.
# The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create
# a causal mask in case tgt_len == 1.
is_causal = (
True if self.is_decoder and not is_cross_attention and attention_mask is None and tgt_len > 1 else False
)
is_causal = self.is_decoder and not is_cross_attention and attention_mask is None and tgt_len > 1

attn_output = torch.nn.functional.scaled_dot_product_attention(
query_layer,
Expand Down
7 changes: 5 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,13 @@ line-length = 119

[tool.ruff.lint]
# Never enforce `E501` (line length violations).
ignore = ["C901", "E501", "E741", "F402", "F823"]
# SIM300: Yoda condition detected
# SIM212: Checks for if expressions that check against a negated condition.
# SIM905: Consider using a list literal instead of `str.split`
ignore = ["C901", "E501", "E741", "F402", "F823", "SIM1", "SIM300", "SIM212", "SIM905"]
# RUF013: Checks for the use of implicit Optional
# in type annotations when the default parameter value is None.
select = ["C", "E", "F", "I", "W", "RUF013", "UP006", "PERF102", "PLC1802", "PLC0208"]
select = ["C", "E", "F", "I", "W", "RUF013", "UP006", "PERF102", "PLC1802", "PLC0208","SIM"]
extend-safe-fixes = ["UP006"]

# Ignore import violations in all `__init__.py` files.
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/commands/serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -897,7 +897,7 @@ def generate_chat_completion(self, req: dict) -> Generator[str, None, None]:
inputs = processor.apply_chat_template(
processor_inputs,
add_generation_prompt=True,
tools=req.get("tools", None),
tools=req.get("tools"),
return_tensors="pt",
return_dict=True,
tokenize=True,
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/data/data_collator.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def tf_default_data_collator(features: list[InputDataClass]) -> dict[str, Any]:
if label_col_name is not None:
if isinstance(first[label_col_name], tf.Tensor):
dtype = tf.int64 if first[label_col_name].dtype.is_integer else tf.float32
elif isinstance(first[label_col_name], np.ndarray) or isinstance(first[label_col_name], np.generic):
elif isinstance(first[label_col_name], (np.ndarray, np.generic)):
dtype = tf.int64 if np.issubdtype(first[label_col_name].dtype, np.integer) else tf.float32
elif isinstance(first[label_col_name], (tuple, list)):
dtype = tf.int64 if isinstance(first[label_col_name][0], int) else tf.float32
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/generation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,8 +645,8 @@ def prepare_inputs_for_generation(

# If it's not defined, it means the model uses the new general mask API
if causal_mask_creation_function is None: # can't be found
token_type_ids = model_inputs.get("token_type_ids", None)
position_ids = model_inputs.get(position_ids_key, None)
token_type_ids = model_inputs.get("token_type_ids")
position_ids = model_inputs.get(position_ids_key)
# Some models may overwrite the general one
causal_mask_creation_function = getattr(self, "create_masks_for_generate", create_masks_for_generate)
attention_mask = causal_mask_creation_function(
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/integrations/flex_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def score_mod(score, batch_idx, head_idx, q_idx, kv_idx):
num_local_query_heads = query.shape[1]

# When running TP this helps:
if not ((num_local_query_heads & (num_local_query_heads - 1)) == 0):
if (num_local_query_heads & (num_local_query_heads - 1)) != 0:
key = repeat_kv(key, query.shape[1] // key.shape[1])
value = repeat_kv(value, query.shape[1] // value.shape[1])
enable_gqa = False
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/integrations/vptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def replace_with_vptq_linear(
should not be passed by the user.
"""

modules_to_not_convert = ["lm_head"] if not modules_to_not_convert else modules_to_not_convert
modules_to_not_convert = modules_to_not_convert if modules_to_not_convert else ["lm_head"]

for name, module in model.named_children():
if current_key_name is None:
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/keras_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def _postprocess_predictions_or_labels(self, inputs):
# If it's a dict with only one key, just return the array
if len(outputs) == 1:
outputs = list(outputs.values())[0]
elif isinstance(inputs[0], list) or isinstance(inputs[0], tuple):
elif isinstance(inputs[0], (tuple, list)):
outputs = []
for input_list in zip(*inputs):
outputs.append(self._concatenate_batches(input_list))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,7 @@ def __init__(
self.vision_feature_layer = vision_feature_layer

if isinstance(vision_config, dict):
vision_config["model_type"] = (
vision_config["model_type"] if "model_type" in vision_config else "siglip_vision_model"
)
vision_config["model_type"] = vision_config.get("model_type", "siglip_vision_model")
vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config)
elif vision_config is None:
vision_config = CONFIG_MAPPING["siglip_vision_model"](
Expand All @@ -99,7 +97,7 @@ def __init__(
self.vision_config = vision_config

if isinstance(text_config, dict):
text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "cohere2"
text_config["model_type"] = text_config.get("model_type", "cohere2")
text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
elif text_config is None:
text_config = CONFIG_MAPPING["cohere2"]()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def rewrite_dict_keys(d):
# (1) remove word breaking symbol, (2) add word ending symbol where the word is not broken up,
# e.g.: d = {'le@@': 5, 'tt@@': 6, 'er': 7} => {'le': 5, 'tt': 6, 'er</w>': 7}
d2 = dict((re.sub(r"@@$", "", k), v) if k.endswith("@@") else (re.sub(r"$", "</w>", k), v) for k, v in d.items())
keep_keys = "<s> <pad> </s> <unk>".split()
keep_keys = ["<s>", "<pad>", "</s>", "<unk>"]
# restore the special tokens
for k in keep_keys:
del d2[f"{k}</w>"]
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/clvp/modeling_clvp.py
Original file line number Diff line number Diff line change
Expand Up @@ -1300,7 +1300,7 @@ def _prepare_model_inputs(

# Check if conditioning_embeds are provided or not, if yes then concatenate the bos_token_id at the end of the conditioning_embeds.
# Then we must subtract the positional_ids because during the forward pass it will be added anyways, so we must cancel them out here.
conditioning_embeds = model_kwargs.get("conditioning_embeds", None)
conditioning_embeds = model_kwargs.get("conditioning_embeds")

if conditioning_embeds is not None:
mel_start_token_embedding = self.model.decoder.input_embeds_layer(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,7 @@ def __init__(
self.alignment_intermediate_size = alignment_intermediate_size

if isinstance(vision_config, dict):
vision_config["model_type"] = (
vision_config["model_type"] if "model_type" in vision_config else "siglip_vision_model"
)
vision_config["model_type"] = vision_config.get("model_type", "siglip_vision_model")
vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config)
elif vision_config is None:
vision_config = CONFIG_MAPPING["siglip_vision_model"](
Expand All @@ -73,7 +71,7 @@ def __init__(
self.vision_config = vision_config

if isinstance(text_config, dict):
text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "cohere2"
text_config["model_type"] = text_config.get("model_type", "cohere2")
text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
elif text_config is None:
text_config = CONFIG_MAPPING["cohere2"](tie_word_embeddings=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class DeepseekVLImageProcessorFast(BaseImageProcessorFast):

def __init__(self, **kwargs: Unpack[DeepseekVLFastImageProcessorKwargs]):
super().__init__(**kwargs)
if kwargs.get("image_mean", None) is None:
if kwargs.get("image_mean") is None:
background_color = (127, 127, 127)
else:
background_color = tuple([int(x * 255) for x in kwargs.get("image_mean")])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,11 @@ class DeepseekVLHybridImageProcessorFast(BaseImageProcessorFast):
high_res_resample = PILImageResampling.BICUBIC

def __init__(self, **kwargs: Unpack[DeepseekVLHybridFastImageProcessorKwargs]):
if kwargs.get("image_mean", None) is None:
if kwargs.get("image_mean") is None:
background_color = (127, 127, 127)
else:
background_color = tuple([int(x * 255) for x in kwargs.get("image_mean")])
if kwargs.get("high_res_image_mean", None) is None:
if kwargs.get("high_res_image_mean") is None:
high_res_background_color = (127, 127, 127)
else:
high_res_background_color = tuple([int(x * 255) for x in kwargs.get("high_res_image_mean")])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -749,11 +749,11 @@ class DeepseekVLHybridImageProcessorFast(DeepseekVLImageProcessorFast):
high_res_resample = PILImageResampling.BICUBIC

def __init__(self, **kwargs: Unpack[DeepseekVLHybridFastImageProcessorKwargs]):
if kwargs.get("image_mean", None) is None:
if kwargs.get("image_mean") is None:
background_color = (127, 127, 127)
else:
background_color = tuple([int(x * 255) for x in kwargs.get("image_mean")])
if kwargs.get("high_res_image_mean", None) is None:
if kwargs.get("high_res_image_mean") is None:
high_res_background_color = (127, 127, 127)
else:
high_res_background_color = tuple([int(x * 255) for x in kwargs.get("high_res_image_mean")])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def rewrite_dict_keys(d):
# (1) remove word breaking symbol, (2) add word ending symbol where the word is not broken up,
# e.g.: d = {'le@@': 5, 'tt@@': 6, 'er': 7} => {'le': 5, 'tt': 6, 'er</w>': 7}
d2 = dict((re.sub(r"@@$", "", k), v) if k.endswith("@@") else (re.sub(r"$", "</w>", k), v) for k, v in d.items())
keep_keys = "<s> <pad> </s> <unk>".split()
keep_keys = ["<s>", "<pad>", "</s>", "<unk>"]
# restore the special tokens
for k in keep_keys:
del d2[f"{k}</w>"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ class JanusImageProcessorFast(BaseImageProcessorFast):
valid_kwargs = JanusFastImageProcessorKwargs

def __init__(self, **kwargs: Unpack[JanusFastImageProcessorKwargs]):
if kwargs.get("image_mean", None) is None:
if kwargs.get("image_mean") is None:
background_color = (127, 127, 127)
else:
background_color = tuple([int(x * 255) for x in kwargs.get("image_mean")])
Expand Down
4 changes: 1 addition & 3 deletions src/transformers/models/jetmoe/modeling_jetmoe.py
Original file line number Diff line number Diff line change
Expand Up @@ -846,9 +846,7 @@ def _init_weights(self, module):
module.weight.data.fill_(1.0)
elif isinstance(module, JetMoeParallelExperts):
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
elif isinstance(module, JetMoeMoA):
module.bias.data.zero_()
elif isinstance(module, JetMoeMoE):
elif isinstance(module, (JetMoeMoA, JetMoeMoE)):
module.bias.data.zero_()


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def __init__(
self.disable_custom_kernels = disable_custom_kernels
# Text backbone
if isinstance(text_config, dict):
text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "bert"
text_config["model_type"] = text_config.get("model_type", "bert")
text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
elif text_config is None:
text_config = CONFIG_MAPPING["bert"]()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def __init__(
self.disable_custom_kernels = disable_custom_kernels
# Text backbone
if isinstance(text_config, dict):
text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "bert"
text_config["model_type"] = text_config.get("model_type", "bert")
text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
elif text_config is None:
text_config = CONFIG_MAPPING["bert"]()
Expand Down
6 changes: 1 addition & 5 deletions src/transformers/models/oneformer/modeling_oneformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2794,11 +2794,7 @@ def _init_weights(self, module: nn.Module):
nn.init.constant_(module.output_proj.bias.data, 0.0)
elif isinstance(module, OneFormerPixelDecoder):
nn.init.normal_(module.level_embed, std=0)
elif isinstance(module, OneFormerTransformerDecoderLayer):
for p in module.parameters():
if p.dim() > 1:
nn.init.xavier_uniform_(p, gain=xavier_std)
elif isinstance(module, OneFormerTransformerDecoderQueryTransformer):
elif isinstance(module, (OneFormerTransformerDecoderLayer, OneFormerTransformerDecoderQueryTransformer)):
for p in module.parameters():
if p.dim() > 1:
nn.init.xavier_uniform_(p, gain=xavier_std)
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/pipelines/fill_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def get_target_ids(self, targets, top_k=None):
vocab = {}
target_ids = []
for target in targets:
id_ = vocab.get(target, None)
id_ = vocab.get(target)
if id_ is None:
input_ids = self.tokenizer(
target,
Expand Down
8 changes: 5 additions & 3 deletions src/transformers/pipelines/token_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,9 +427,11 @@ def aggregate_overlapping_entities(self, entities):
if previous_entity["start"] <= entity["start"] < previous_entity["end"]:
current_length = entity["end"] - entity["start"]
previous_length = previous_entity["end"] - previous_entity["start"]
if current_length > previous_length:
previous_entity = entity
elif current_length == previous_length and entity["score"] > previous_entity["score"]:
if (
current_length > previous_length
or current_length == previous_length
and entity["score"] > previous_entity["score"]
):
previous_entity = entity
else:
aggregated_entities.append(previous_entity)
Expand Down
4 changes: 1 addition & 3 deletions src/transformers/testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2622,9 +2622,7 @@ def nested_simplify(obj, decimals=3):
return nested_simplify(obj.tolist())
elif isinstance(obj, Mapping):
return {nested_simplify(k, decimals): nested_simplify(v, decimals) for k, v in obj.items()}
elif isinstance(obj, (str, int, np.int64)):
return obj
elif obj is None:
elif isinstance(obj, (str, int, np.int64)) or obj is None:
return obj
elif is_torch_available() and isinstance(obj, torch.Tensor):
return nested_simplify(obj.tolist(), decimals)
Expand Down
4 changes: 1 addition & 3 deletions src/transformers/tokenization_mistral_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1784,9 +1784,7 @@ def from_pretrained(
pathlib_repo_file = Path(path)
file_name = pathlib_repo_file.name
suffix = "".join(pathlib_repo_file.suffixes)
if file_name == "tekken.json":
valid_tokenizer_files.append(file_name)
elif suffix in sentencepiece_suffixes:
if file_name == "tekken.json" or suffix in sentencepiece_suffixes:
valid_tokenizer_files.append(file_name)

if len(valid_tokenizer_files) == 0:
Expand Down
8 changes: 5 additions & 3 deletions src/transformers/tokenization_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -877,9 +877,11 @@ def get_input_ids(text):

input_ids = []
for ids_or_pair_ids in batch_text_or_text_pairs:
if not isinstance(ids_or_pair_ids, (list, tuple)):
ids, pair_ids = ids_or_pair_ids, None
elif is_split_into_words and not isinstance(ids_or_pair_ids[0], (list, tuple)):
if (
not isinstance(ids_or_pair_ids, (list, tuple))
or is_split_into_words
and not isinstance(ids_or_pair_ids[0], (list, tuple))
):
ids, pair_ids = ids_or_pair_ids, None
else:
ids, pair_ids = ids_or_pair_ids
Expand Down
14 changes: 3 additions & 11 deletions src/transformers/trainer_pt_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,7 @@ def find_batch_size(tensors):
result = find_batch_size(value)
if result is not None:
return result
elif isinstance(tensors, torch.Tensor):
return tensors.shape[0] if len(tensors.shape) >= 1 else None
elif isinstance(tensors, np.ndarray):
elif isinstance(tensors, (torch.Tensor, np.ndarray)):
return tensors.shape[0] if len(tensors.shape) >= 1 else None


Expand Down Expand Up @@ -634,10 +632,7 @@ def __init__(
self.batch_size = batch_size
if lengths is None:
model_input_name = model_input_name if model_input_name is not None else "input_ids"
if (
not (isinstance(dataset[0], dict) or isinstance(dataset[0], BatchEncoding))
or model_input_name not in dataset[0]
):
if not isinstance(dataset[0], (dict, BatchEncoding)) or model_input_name not in dataset[0]:
raise ValueError(
"Can only automatically infer lengths for datasets whose items are dictionaries with an "
f"'{model_input_name}' key."
Expand Down Expand Up @@ -697,10 +692,7 @@ def __init__(

if lengths is None:
model_input_name = model_input_name if model_input_name is not None else "input_ids"
if (
not (isinstance(dataset[0], dict) or isinstance(dataset[0], BatchEncoding))
or model_input_name not in dataset[0]
):
if not isinstance(dataset[0], (dict, BatchEncoding)) or model_input_name not in dataset[0]:
raise ValueError(
"Can only automatically infer lengths for datasets whose items are dictionaries with an "
f"'{model_input_name}' key."
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/utils/auto_docstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -1411,8 +1411,8 @@ def _process_regular_parameters(
param_type = f"[`{class_name}`]"
else:
param_type = f"[`{param_type.split('.')[-1]}`]"
elif param_type == "" and False: # TODO: Enforce typing for all parameters
print(f"🚨 {param_name} for {func.__qualname__} in file {func.__code__.co_filename} has no type")
# elif param_type == "" and False: # TODO: Enforce typing for all parameters
# print(f"🚨 {param_name} for {func.__qualname__} in file {func.__code__.co_filename} has no type")
Comment on lines +1414 to +1415
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @yonigozlan to make sure it's a valid change

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure what happened here, must have left this when debugging. We can leave it like that and I'll check in another PR if we should enable this back and make it a hard requirement (raise an error)

param_type = param_type if "`" in param_type else f"`{param_type}`"
# Format the parameter docstring
if additional_info:
Expand Down Expand Up @@ -1840,7 +1840,7 @@ def auto_class_docstring(cls, custom_intro=None, custom_args=None, checkpoint=No
docstring += set_min_indent(f"\n{docstring_init}", indent_level)
elif is_dataclass:
# No init function, we have a data class
docstring += "\nArgs:\n" if not docstring_args else docstring_args
docstring += docstring_args if docstring_args else "\nArgs:\n"
source_args_dict = get_args_doc_from_source(ModelOutputArgs)
doc_class = cls.__doc__ if cls.__doc__ else ""
documented_kwargs, _ = parse_docstring(doc_class)
Expand Down
Loading
Loading