Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/transformers/debug_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def __init__(self, model, max_frames_to_save=21, trace_batch_nums=[], abort_afte
self.batch_number = 0
self.total_calls = 0
self.detected_overflow = False
self.prefix = " "
self.prefix = " " * 17

self.analyse_model()

Expand Down
4 changes: 4 additions & 0 deletions src/transformers/image_processing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,10 @@ def get_patch_output_size(image, target_resolution, input_data_format):
original_height, original_width = get_image_size(image, channel_dim=input_data_format)
target_height, target_width = target_resolution

if original_width == 0:
raise ValueError("original_width can not be 0")
if original_height == 0:
raise ValueError("original_height can not be 0")
scale_w = target_width / original_width
scale_h = target_height / original_height

Expand Down
23 changes: 12 additions & 11 deletions src/transformers/masking_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,18 +201,19 @@ def prepare_padding_mask(
From the 2D attention mask, prepare the correct padding mask to use by potentially padding it, and slicing
according to the `kv_offset` if `_slice` is `True`.
"""
if attention_mask is None:
return None
local_padding_mask = attention_mask
if attention_mask is not None:
# Pad it if necessary
if (padding_length := kv_length + kv_offset - attention_mask.shape[-1]) > 0:
local_padding_mask = torch.nn.functional.pad(attention_mask, (0, padding_length))
# For flex, we should not slice them, only use an offset
if _slice:
# Equivalent to: `local_padding_mask = attention_mask[:, kv_offset : kv_offset + kv_length]`,
# but without data-dependent slicing (i.e. torch.compile friendly)
mask_indices = torch.arange(kv_length, device=local_padding_mask.device)
mask_indices += kv_offset
local_padding_mask = local_padding_mask[:, mask_indices]
# Pad it if necessary
if (padding_length := kv_length + kv_offset - attention_mask.shape[-1]) > 0:
local_padding_mask = torch.nn.functional.pad(attention_mask, (0, padding_length))
# For flex, we should not slice them, only use an offset
if _slice:
# Equivalent to: `local_padding_mask = attention_mask[:, kv_offset : kv_offset + kv_length]`,
# but without data-dependent slicing (i.e. torch.compile friendly)
mask_indices = torch.arange(kv_length, device=local_padding_mask.device)
mask_indices += kv_offset
local_padding_mask = local_padding_mask[:, mask_indices]
return local_padding_mask


Expand Down