Skip to content

Commit a174672

Browse files
committed
Fix typoes in src
Signed-off-by: Yuanyuan Chen <[email protected]>
1 parent 6e69b60 commit a174672

22 files changed

+56
-56
lines changed

src/transformers/generation/continuous_batching/cache.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ class PagedAttentionCache:
7979
layer group, and the shape of the cache tensor is `[num_blocks * block_size, num_heads, head_size]`.
8080
8181
Grouping layers into groups is useful because when we allocate one block to a group N, the block allocated is the
82-
same for all layers in group N, equivalently it is allocated accross all cache tensors. This allows us to
82+
same for all layers in group N, equivalently it is allocated across all cache tensors. This allows us to
8383
efficiently allocate and free blocks, and to efficiently read and write key and value states.
8484
8585
For instance, imagine we have 8 blocks of cache and a model with two layer groups: a full-attention group with 3
@@ -339,7 +339,7 @@ class PagedAttentionMemoryHandler:
339339
The memory footprint consists of three main components:
340340
- Cache memory: the space needed to store the cache tensors:
341341
2 * layer_group_size * [num_pages, page_size] * cache_dtype
342-
- Activation memory: the space temporarly taken by the largest activation during the model forward pass:
342+
- Activation memory: the space temporarily taken by the largest activation during the model forward pass:
343343
peak_activation_per_token * max_tokens_per_batch * activation_dtype_size
344344
- Static tensors: the space taken by the input/output buffers and metadata tensors for batch processing, sum of:
345345
- inputs_ids + outputs_ids + position_ids + logits_indices: 4 * max_tokens_per_batch * int32_size

src/transformers/models/big_bird/modeling_big_bird.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1273,14 +1273,14 @@ def _get_single_block_row_attention(
12731273
if block_id == to_end_block_id - 2:
12741274
illegal_blocks.append(1)
12751275

1276-
selected_random_blokcs = []
1276+
selected_random_blocks = []
12771277

12781278
for i in range(to_end_block_id - to_start_block_id):
12791279
if perm_block[i] not in illegal_blocks:
1280-
selected_random_blokcs.append(perm_block[i])
1281-
if len(selected_random_blokcs) == num_rand_blocks:
1280+
selected_random_blocks.append(perm_block[i])
1281+
if len(selected_random_blocks) == num_rand_blocks:
12821282
break
1283-
return np.array(selected_random_blokcs, dtype=np.int32)
1283+
return np.array(selected_random_blocks, dtype=np.int32)
12841284

12851285

12861286
# Copied from transformers.models.bert.modeling_bert.BertSelfOutput with Bert->BigBird

src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1088,14 +1088,14 @@ def _get_single_block_row_attention(
10881088
if block_id == to_end_block_id - 2:
10891089
illegal_blocks.append(1)
10901090

1091-
selected_random_blokcs = []
1091+
selected_random_blocks = []
10921092

10931093
for i in range(to_end_block_id - to_start_block_id):
10941094
if perm_block[i] not in illegal_blocks:
1095-
selected_random_blokcs.append(perm_block[i])
1096-
if len(selected_random_blokcs) == num_rand_blocks:
1095+
selected_random_blocks.append(perm_block[i])
1096+
if len(selected_random_blocks) == num_rand_blocks:
10971097
break
1098-
return np.array(selected_random_blokcs, dtype=np.int32)
1098+
return np.array(selected_random_blocks, dtype=np.int32)
10991099

11001100

11011101
class BigBirdPegasusEncoderAttention(nn.Module):

src/transformers/models/cpmant/modeling_cpmant.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ def forward(
352352
output_hidden_states: Optional[bool] = None,
353353
past_key_values: Optional[Cache] = None,
354354
use_cache: Optional[bool] = None,
355-
cache_postion: Optional[torch.Tensor] = None,
355+
cache_position: Optional[torch.Tensor] = None,
356356
):
357357
"""
358358
Args:
@@ -493,16 +493,16 @@ def _position_bucket(self, relative_position, num_buckets=32, max_distance=128):
493493
relative_position = torch.abs(relative_position)
494494
max_exact = num_buckets // 2
495495
is_small = relative_position < max_exact
496-
relative_postion_if_large = max_exact + (
496+
relative_position_if_large = max_exact + (
497497
torch.log(relative_position.float() / max_exact)
498498
/ math.log(max_distance / max_exact)
499499
* (num_buckets - max_exact)
500500
).to(torch.int32)
501-
relative_postion_if_large = torch.min(
502-
relative_postion_if_large,
503-
torch.full_like(relative_postion_if_large, num_buckets - 1),
501+
relative_position_if_large = torch.min(
502+
relative_position_if_large,
503+
torch.full_like(relative_position_if_large, num_buckets - 1),
504504
)
505-
relative_buckets += torch.where(is_small, relative_position.to(torch.int32), relative_postion_if_large)
505+
relative_buckets += torch.where(is_small, relative_position.to(torch.int32), relative_position_if_large)
506506
return relative_buckets
507507

508508

src/transformers/models/gemma3/convert_gemma3_weights.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -439,9 +439,9 @@ def convert_transformer_weights(
439439
decoder_block_start = path.find(_TRANSFORMER_DECODER_BLOCK)
440440
decoder_block_offset = decoder_block_start + _TRANSFORMER_DECODER_BLOCK_LEN
441441
decoder_block_path = path[decoder_block_offset:]
442-
next_path_seperator_idx = decoder_block_path.find("/")
443-
layer_idx = decoder_block_path[:next_path_seperator_idx]
444-
decoder_block_path = decoder_block_path[next_path_seperator_idx:]
442+
next_path_separator_idx = decoder_block_path.find("/")
443+
layer_idx = decoder_block_path[:next_path_separator_idx]
444+
decoder_block_path = decoder_block_path[next_path_separator_idx:]
445445

446446
base_path = f"language_model.model.layers.{layer_idx}"
447447

src/transformers/models/git/modeling_git.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -955,7 +955,7 @@ def __init__(self, config):
955955
self.visual_projection = GitProjection(config)
956956

957957
if config.num_image_with_embedding is not None:
958-
self.img_temperal_embedding = nn.ParameterList(
958+
self.img_temporal_embedding = nn.ParameterList(
959959
nn.Parameter(torch.zeros(1, 1, config.vision_config.hidden_size))
960960
for _ in range(config.num_image_with_embedding)
961961
)
@@ -1120,7 +1120,7 @@ def forward(
11201120
visual_features_frame = self.image_encoder(
11211121
pixel_values[:, frame_idx, :, :], interpolate_pos_encoding=interpolate_pos_encoding
11221122
).last_hidden_state
1123-
visual_features_frame += self.img_temperal_embedding[frame_idx]
1123+
visual_features_frame += self.img_temporal_embedding[frame_idx]
11241124
visual_features.append(visual_features_frame)
11251125

11261126
# finally, concatenate all features along sequence dimension

src/transformers/models/groupvit/modeling_groupvit.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def gumbel_softmax(logits: torch.Tensor, tau: float = 1, hard: bool = False, dim
7575
y_hard = torch.zeros_like(logits, memory_format=torch.legacy_contiguous_format).scatter_(dim, index, 1.0)
7676
ret = y_hard - y_soft.detach() + y_soft
7777
else:
78-
# Reparametrization trick.
78+
# Reparameterization trick.
7979
ret = y_soft
8080
return ret
8181

@@ -663,7 +663,7 @@ def forward(
663663
attn_weights = nn.functional.softmax(attn_weights, dim=-1)
664664

665665
if output_attentions:
666-
# this operation is a bit akward, but it's required to
666+
# this operation is a bit awkward, but it's required to
667667
# make sure that attn_weights keeps its gradient.
668668
# In order to do so, attn_weights have to reshaped
669669
# twice and have to be reused in the following

src/transformers/models/imagegpt/image_processing_imagegpt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ def preprocess(
247247
)
248248

249249
# Here, normalize() is using a constant factor to divide pixel values.
250-
# hence, the method does not need iamge_mean and image_std.
250+
# hence, the method does not need image_mean and image_std.
251251
validate_preprocess_arguments(
252252
do_resize=do_resize,
253253
size=size,

src/transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
# Similar to transformers.models.pix2struct.image_processing_pix2struct.torch_extract_patches but dealing with a batch of images directly.
3737
def torch_extract_patches(image_tensor, patch_height, patch_width):
3838
"""
39-
Utiliy function to extract patches from a given tensor representing a batch of images. Returns a tensor of shape
39+
Utility function to extract patches from a given tensor representing a batch of images. Returns a tensor of shape
4040
(batch_size, `rows`, `columns`, `num_channels` x `patch_height` x `patch_width`).
4141
4242
Args:

src/transformers/models/kyutai_speech_to_text/feature_extraction_kyutai_speech_to_text.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ def __call__(
204204
if padding:
205205
padded_inputs["padding_mask"] = padded_inputs.pop("attention_mask")
206206

207-
# now let's padd left and right
207+
# now let's pad left and right
208208
pad_left = int(self.audio_silence_prefix_seconds * self.sampling_rate)
209209
pad_right = int((self.audio_delay_seconds + 1.0) * self.sampling_rate)
210210
padded_inputs["input_values"] = np.pad(

0 commit comments

Comments
 (0)