Skip to content

Commit acf5d39

Browse files
cyyevervijayabhaskar-ev
authored andcommitted
Fix typoes in src and tests (huggingface#40845)
Signed-off-by: Yuanyuan Chen <[email protected]>
1 parent b37bf94 commit acf5d39

File tree

44 files changed

+116
-98
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+116
-98
lines changed

src/transformers/generation/continuous_batching/cache.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ class PagedAttentionCache:
7979
layer group, and the shape of the cache tensor is `[num_blocks * block_size, num_heads, head_size]`.
8080
8181
Grouping layers into groups is useful because when we allocate one block to a group N, the block allocated is the
82-
same for all layers in group N, equivalently it is allocated accross all cache tensors. This allows us to
82+
same for all layers in group N, equivalently it is allocated across all cache tensors. This allows us to
8383
efficiently allocate and free blocks, and to efficiently read and write key and value states.
8484
8585
For instance, imagine we have 8 blocks of cache and a model with two layer groups: a full-attention group with 3
@@ -349,7 +349,7 @@ class PagedAttentionMemoryHandler:
349349
The memory footprint consists of three main components:
350350
- Cache memory: the space needed to store the cache tensors:
351351
2 * layer_group_size * [num_pages, page_size] * cache_dtype
352-
- Activation memory: the space temporarly taken by the largest activation during the model forward pass:
352+
- Activation memory: the space temporarily taken by the largest activation during the model forward pass:
353353
peak_activation_per_token * max_tokens_per_batch * activation_dtype_size
354354
- Static tensors: the space taken by the input/output buffers and metadata tensors for batch processing, sum of:
355355
- inputs_ids + outputs_ids + position_ids + logits_indices: 4 * max_tokens_per_batch * int32_size

src/transformers/models/big_bird/modeling_big_bird.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1108,14 +1108,14 @@ def _get_single_block_row_attention(
11081108
if block_id == to_end_block_id - 2:
11091109
illegal_blocks.append(1)
11101110

1111-
selected_random_blokcs = []
1111+
selected_random_blocks = []
11121112

11131113
for i in range(to_end_block_id - to_start_block_id):
11141114
if perm_block[i] not in illegal_blocks:
1115-
selected_random_blokcs.append(perm_block[i])
1116-
if len(selected_random_blokcs) == num_rand_blocks:
1115+
selected_random_blocks.append(perm_block[i])
1116+
if len(selected_random_blocks) == num_rand_blocks:
11171117
break
1118-
return np.array(selected_random_blokcs, dtype=np.int32)
1118+
return np.array(selected_random_blocks, dtype=np.int32)
11191119

11201120

11211121
# Copied from transformers.models.bert.modeling_bert.BertSelfOutput with Bert->BigBird

src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,14 +1086,14 @@ def _get_single_block_row_attention(
10861086
if block_id == to_end_block_id - 2:
10871087
illegal_blocks.append(1)
10881088

1089-
selected_random_blokcs = []
1089+
selected_random_blocks = []
10901090

10911091
for i in range(to_end_block_id - to_start_block_id):
10921092
if perm_block[i] not in illegal_blocks:
1093-
selected_random_blokcs.append(perm_block[i])
1094-
if len(selected_random_blokcs) == num_rand_blocks:
1093+
selected_random_blocks.append(perm_block[i])
1094+
if len(selected_random_blocks) == num_rand_blocks:
10951095
break
1096-
return np.array(selected_random_blokcs, dtype=np.int32)
1096+
return np.array(selected_random_blocks, dtype=np.int32)
10971097

10981098

10991099
class BigBirdPegasusEncoderAttention(nn.Module):

src/transformers/models/cpmant/modeling_cpmant.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ def forward(
351351
output_hidden_states: Optional[bool] = None,
352352
past_key_values: Optional[Cache] = None,
353353
use_cache: Optional[bool] = None,
354-
cache_postion: Optional[torch.Tensor] = None,
354+
cache_position: Optional[torch.Tensor] = None,
355355
):
356356
"""
357357
Args:
@@ -492,16 +492,16 @@ def _position_bucket(self, relative_position, num_buckets=32, max_distance=128):
492492
relative_position = torch.abs(relative_position)
493493
max_exact = num_buckets // 2
494494
is_small = relative_position < max_exact
495-
relative_postion_if_large = max_exact + (
495+
relative_position_if_large = max_exact + (
496496
torch.log(relative_position.float() / max_exact)
497497
/ math.log(max_distance / max_exact)
498498
* (num_buckets - max_exact)
499499
).to(torch.int32)
500-
relative_postion_if_large = torch.min(
501-
relative_postion_if_large,
502-
torch.full_like(relative_postion_if_large, num_buckets - 1),
500+
relative_position_if_large = torch.min(
501+
relative_position_if_large,
502+
torch.full_like(relative_position_if_large, num_buckets - 1),
503503
)
504-
relative_buckets += torch.where(is_small, relative_position.to(torch.int32), relative_postion_if_large)
504+
relative_buckets += torch.where(is_small, relative_position.to(torch.int32), relative_position_if_large)
505505
return relative_buckets
506506

507507

src/transformers/models/gemma3/convert_gemma3_weights.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -439,9 +439,9 @@ def convert_transformer_weights(
439439
decoder_block_start = path.find(_TRANSFORMER_DECODER_BLOCK)
440440
decoder_block_offset = decoder_block_start + _TRANSFORMER_DECODER_BLOCK_LEN
441441
decoder_block_path = path[decoder_block_offset:]
442-
next_path_seperator_idx = decoder_block_path.find("/")
443-
layer_idx = decoder_block_path[:next_path_seperator_idx]
444-
decoder_block_path = decoder_block_path[next_path_seperator_idx:]
442+
next_path_separator_idx = decoder_block_path.find("/")
443+
layer_idx = decoder_block_path[:next_path_separator_idx]
444+
decoder_block_path = decoder_block_path[next_path_separator_idx:]
445445

446446
base_path = f"language_model.model.layers.{layer_idx}"
447447

src/transformers/models/git/modeling_git.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -950,7 +950,7 @@ def __init__(self, config):
950950
self.visual_projection = GitProjection(config)
951951

952952
if config.num_image_with_embedding is not None:
953-
self.img_temperal_embedding = nn.ParameterList(
953+
self.img_temporal_embedding = nn.ParameterList(
954954
nn.Parameter(torch.zeros(1, 1, config.vision_config.hidden_size))
955955
for _ in range(config.num_image_with_embedding)
956956
)
@@ -1115,7 +1115,7 @@ def forward(
11151115
visual_features_frame = self.image_encoder(
11161116
pixel_values[:, frame_idx, :, :], interpolate_pos_encoding=interpolate_pos_encoding
11171117
).last_hidden_state
1118-
visual_features_frame += self.img_temperal_embedding[frame_idx]
1118+
visual_features_frame += self.img_temporal_embedding[frame_idx]
11191119
visual_features.append(visual_features_frame)
11201120

11211121
# finally, concatenate all features along sequence dimension

src/transformers/models/groupvit/modeling_groupvit.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def gumbel_softmax(logits: torch.Tensor, tau: float = 1, hard: bool = False, dim
7474
y_hard = torch.zeros_like(logits, memory_format=torch.legacy_contiguous_format).scatter_(dim, index, 1.0)
7575
ret = y_hard - y_soft.detach() + y_soft
7676
else:
77-
# Reparametrization trick.
77+
# Reparameterization trick.
7878
ret = y_soft
7979
return ret
8080

@@ -662,7 +662,7 @@ def forward(
662662
attn_weights = nn.functional.softmax(attn_weights, dim=-1)
663663

664664
if output_attentions:
665-
# this operation is a bit akward, but it's required to
665+
# this operation is a bit awkward, but it's required to
666666
# make sure that attn_weights keeps its gradient.
667667
# In order to do so, attn_weights have to reshaped
668668
# twice and have to be reused in the following

src/transformers/models/imagegpt/image_processing_imagegpt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ def preprocess(
242242
raise ValueError("Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, or torch.Tensor")
243243

244244
# Here, normalize() is using a constant factor to divide pixel values.
245-
# hence, the method does not need iamge_mean and image_std.
245+
# hence, the method does not need image_mean and image_std.
246246
validate_preprocess_arguments(
247247
do_resize=do_resize,
248248
size=size,

src/transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
# Similar to transformers.models.pix2struct.image_processing_pix2struct.torch_extract_patches but dealing with a batch of images directly.
3535
def torch_extract_patches(image_tensor, patch_height, patch_width):
3636
"""
37-
Utiliy function to extract patches from a given tensor representing a batch of images. Returns a tensor of shape
37+
Utility function to extract patches from a given tensor representing a batch of images. Returns a tensor of shape
3838
(batch_size, `rows`, `columns`, `num_channels` x `patch_height` x `patch_width`).
3939
4040
Args:

src/transformers/models/kyutai_speech_to_text/feature_extraction_kyutai_speech_to_text.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ def __call__(
203203
if padding:
204204
padded_inputs["padding_mask"] = padded_inputs.pop("attention_mask")
205205

206-
# now let's padd left and right
206+
# now let's pad left and right
207207
pad_left = int(self.audio_silence_prefix_seconds * self.sampling_rate)
208208
pad_right = int((self.audio_delay_seconds + 1.0) * self.sampling_rate)
209209
padded_inputs["input_values"] = np.pad(

0 commit comments

Comments
 (0)