Skip to content

Commit 1ce12ac

Browse files
Replace Optional and Union typing with | in some source files (#42372)
Signed-off-by: Yuanyuan Chen <[email protected]> Co-authored-by: Matt <[email protected]>
1 parent f13b100 commit 1ce12ac

20 files changed

+451
-474
lines changed

src/transformers/integrations/accelerate.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
import re
2323
from collections import OrderedDict, defaultdict
2424
from contextlib import contextmanager
25-
from typing import TYPE_CHECKING, Optional, Union
25+
from typing import TYPE_CHECKING
2626

2727
from safetensors import safe_open
2828
from safetensors.torch import save_file
@@ -550,14 +550,14 @@ def offload_weight(weight: torch.Tensor, weight_name: str, offload_folder: str |
550550

551551
def _init_infer_auto_device_map(
552552
model: nn.Module,
553-
max_memory: Optional[dict[Union[int, str], Union[int, str]]] = None,
554-
no_split_module_classes: Optional[list[str]] = None,
555-
tied_parameters: Optional[list[list[str]]] = None,
553+
max_memory: dict[int | str, int | str] | None = None,
554+
no_split_module_classes: list[str] | None = None,
555+
tied_parameters: list[list[str]] | None = None,
556556
hf_quantizer: "HfQuantizer | None" = None,
557557
) -> tuple[
558-
list[Union[int, str]],
559-
dict[Union[int, str], Union[int, str]],
560-
list[Union[int, str]],
558+
list[int | str],
559+
dict[int | str, int | str],
560+
list[int | str],
561561
list[int],
562562
dict[str, int],
563563
list[list[str]],
@@ -620,12 +620,12 @@ def _init_infer_auto_device_map(
620620

621621
def infer_auto_device_map(
622622
model: nn.Module,
623-
max_memory: Optional[dict[Union[int, str], Union[int, str]]] = None,
624-
no_split_module_classes: Optional[list[str]] = None,
623+
max_memory: dict[int | str, int | str] | None = None,
624+
no_split_module_classes: list[str] | None = None,
625625
verbose: bool = False,
626626
clean_result: bool = True,
627627
offload_buffers: bool = False,
628-
tied_parameters: Optional[list[list[str]]] = None,
628+
tied_parameters: list[list[str]] | None = None,
629629
hf_quantizer: "HfQuantizer | None" = None,
630630
):
631631
"""

src/transformers/integrations/bitsandbytes.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import inspect
22
from collections import defaultdict
33
from inspect import signature
4-
from typing import Optional
54

65
from ..core_model_loading import ConversionOps
76
from ..quantizers.quantizers_utils import get_module_from_name
@@ -38,7 +37,7 @@ def __init__(self, hf_quantizer):
3837
def convert(
3938
self,
4039
input_dict: dict[str, list[torch.Tensor]],
41-
model: Optional[torch.nn.Module] = None,
40+
model: torch.nn.Module | None = None,
4241
missing_keys=None,
4342
**kwargs,
4443
) -> dict[str, torch.Tensor]:
@@ -95,7 +94,7 @@ def __init__(self, hf_quantizer):
9594
def convert(
9695
self,
9796
input_dict: dict[str, list[torch.Tensor]],
98-
model: Optional[torch.nn.Module] = None,
97+
model: torch.nn.Module | None = None,
9998
missing_keys=None,
10099
**kwargs,
101100
) -> dict[str, torch.Tensor]:

src/transformers/integrations/finegrained_fp8.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
import re
1717
from collections.abc import Sequence
18-
from typing import Any, Optional, Union
18+
from typing import Any
1919

2020
from ..core_model_loading import ConversionOps
2121
from ..utils import is_accelerate_available, is_torch_accelerator_available, is_torch_available, logging
@@ -655,13 +655,13 @@ def convert(self, input_dict: torch.Tensor, **kwargs) -> dict[str, torch.Tensor]
655655
class Fp8Dequantize(ConversionOps):
656656
"""Inverse operation of :class:`Fp8Quantize`. Takes a pair (weight, scale) and reconstructs the fp32 tensor."""
657657

658-
def __init__(self, block_size: Optional[tuple[int, int]] = None):
658+
def __init__(self, block_size: tuple[int, int] | None = None):
659659
self.block_size = block_size
660660
self.reverse_op = Fp8Quantize
661661

662662
def convert(
663663
self,
664-
value: Union[Sequence[torch.Tensor], dict[str, torch.Tensor]],
664+
value: Sequence[torch.Tensor] | dict[str, torch.Tensor],
665665
*,
666666
context: dict[str, Any],
667667
) -> torch.Tensor:

src/transformers/integrations/tensor_parallel.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import os
1919
import re
2020
from functools import partial, reduce
21-
from typing import Optional
2221

2322
from ..utils.import_utils import is_torch_available
2423

@@ -322,7 +321,7 @@ def repack_weights(
322321
return final_ordered_tensor
323322

324323

325-
def get_tensor_shard(param, empty_param, device_mesh, rank, dim, tensor_idx: Optional[int] = None):
324+
def get_tensor_shard(param, empty_param, device_mesh, rank, dim, tensor_idx: int | None = None):
326325
"""
327326
Generalized tensor sharding across a multi-dimensional device mesh.
328327
Extract only the fraction of the parameter owned by the given `rank` when the parameter would have gone sharding at provided `dim`.

src/transformers/optimization.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import math
1717
import warnings
1818
from functools import partial
19-
from typing import Optional, Union
2019

2120
import torch
2221
from torch.optim import Optimizer
@@ -283,7 +282,7 @@ def get_polynomial_decay_schedule_with_warmup(
283282
return LambdaLR(optimizer, lr_lambda, last_epoch)
284283

285284

286-
def _get_inverse_sqrt_schedule_lr_lambda(current_step: int, *, num_warmup_steps: int, timescale: Optional[int] = None):
285+
def _get_inverse_sqrt_schedule_lr_lambda(current_step: int, *, num_warmup_steps: int, timescale: int | None = None):
287286
if current_step < num_warmup_steps:
288287
return float(current_step) / float(max(1, num_warmup_steps))
289288
shift = timescale - num_warmup_steps
@@ -292,7 +291,7 @@ def _get_inverse_sqrt_schedule_lr_lambda(current_step: int, *, num_warmup_steps:
292291

293292

294293
def get_inverse_sqrt_schedule(
295-
optimizer: Optimizer, num_warmup_steps: int, timescale: Optional[int] = None, last_epoch: int = -1
294+
optimizer: Optimizer, num_warmup_steps: int, timescale: int | None = None, last_epoch: int = -1
296295
):
297296
"""
298297
Create a schedule with an inverse square-root learning rate, from the initial lr set in the optimizer, after a
@@ -338,8 +337,8 @@ def get_cosine_with_min_lr_schedule_with_warmup(
338337
num_training_steps: int,
339338
num_cycles: float = 0.5,
340339
last_epoch: int = -1,
341-
min_lr: Optional[float] = None,
342-
min_lr_rate: Optional[float] = None,
340+
min_lr: float | None = None,
341+
min_lr_rate: float | None = None,
343342
):
344343
"""
345344
Create a schedule with a learning rate that decreases following the values of the cosine function between the
@@ -391,7 +390,7 @@ def _get_cosine_with_min_lr_schedule_with_warmup_lr_rate_lambda(
391390
num_training_steps: int,
392391
num_cycles: float,
393392
min_lr_rate: float = 0.0,
394-
warmup_lr_rate: Optional[float] = None,
393+
warmup_lr_rate: float | None = None,
395394
):
396395
current_step = float(current_step)
397396
num_warmup_steps = float(num_warmup_steps)
@@ -415,9 +414,9 @@ def get_cosine_with_min_lr_schedule_with_warmup_lr_rate(
415414
num_training_steps: int,
416415
num_cycles: float = 0.5,
417416
last_epoch: int = -1,
418-
min_lr: Optional[float] = None,
419-
min_lr_rate: Optional[float] = None,
420-
warmup_lr_rate: Optional[float] = None,
417+
min_lr: float | None = None,
418+
min_lr_rate: float | None = None,
419+
warmup_lr_rate: float | None = None,
421420
):
422421
"""
423422
Create a schedule with a learning rate that decreases following the values of the cosine function between the
@@ -507,8 +506,8 @@ def get_wsd_schedule(
507506
optimizer: Optimizer,
508507
num_warmup_steps: int,
509508
num_decay_steps: int,
510-
num_training_steps: Optional[int] = None,
511-
num_stable_steps: Optional[int] = None,
509+
num_training_steps: int | None = None,
510+
num_stable_steps: int | None = None,
512511
warmup_type: str = "linear",
513512
decay_type: str = "cosine",
514513
min_lr_ratio: float = 0,
@@ -592,11 +591,11 @@ def get_wsd_schedule(
592591

593592

594593
def get_scheduler(
595-
name: Union[str, SchedulerType],
594+
name: str | SchedulerType,
596595
optimizer: Optimizer,
597-
num_warmup_steps: Optional[int] = None,
598-
num_training_steps: Optional[int] = None,
599-
scheduler_specific_kwargs: Optional[dict] = None,
596+
num_warmup_steps: int | None = None,
597+
num_training_steps: int | None = None,
598+
scheduler_specific_kwargs: dict | None = None,
600599
):
601600
"""
602601
Unified API to get any scheduler from its name.

src/transformers/pipelines/base.py

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,8 @@ def inner(items):
177177
def load_model(
178178
model,
179179
config: AutoConfig,
180-
model_classes: Optional[tuple[type, ...]] = None,
181-
task: Optional[str] = None,
180+
model_classes: tuple[type, ...] | None = None,
181+
task: str | None = None,
182182
**model_kwargs,
183183
):
184184
"""
@@ -270,7 +270,7 @@ def load_model(
270270
return model
271271

272272

273-
def get_default_model_and_revision(targeted_task: dict, task_options: Optional[Any]) -> tuple[str, str]:
273+
def get_default_model_and_revision(targeted_task: dict, task_options: Any | None) -> tuple[str, str]:
274274
"""
275275
Select a default model to use for a given task.
276276
@@ -305,9 +305,9 @@ def get_default_model_and_revision(targeted_task: dict, task_options: Optional[A
305305

306306
def load_assistant_model(
307307
model: "PreTrainedModel",
308-
assistant_model: Optional[Union[str, "PreTrainedModel"]],
309-
assistant_tokenizer: Optional[PreTrainedTokenizer],
310-
) -> tuple[Optional["PreTrainedModel"], Optional[PreTrainedTokenizer]]:
308+
assistant_model: Union[str, "PreTrainedModel"] | None,
309+
assistant_tokenizer: PreTrainedTokenizer | None,
310+
) -> tuple[Optional["PreTrainedModel"], PreTrainedTokenizer | None]:
311311
"""
312312
Prepares the assistant model and the assistant tokenizer for a pipeline whose model that can call `generate`.
313313
@@ -404,9 +404,9 @@ class PipelineDataFormat:
404404

405405
def __init__(
406406
self,
407-
output_path: Optional[str],
408-
input_path: Optional[str],
409-
column: Optional[str],
407+
output_path: str | None,
408+
input_path: str | None,
409+
column: str | None,
410410
overwrite: bool = False,
411411
):
412412
self.output_path = output_path
@@ -430,7 +430,7 @@ def __iter__(self):
430430
raise NotImplementedError()
431431

432432
@abstractmethod
433-
def save(self, data: Union[dict, list[dict]]):
433+
def save(self, data: dict | list[dict]):
434434
"""
435435
Save the provided data object with the representation for the current [`~pipelines.PipelineDataFormat`].
436436
@@ -439,7 +439,7 @@ def save(self, data: Union[dict, list[dict]]):
439439
"""
440440
raise NotImplementedError()
441441

442-
def save_binary(self, data: Union[dict, list[dict]]) -> str:
442+
def save_binary(self, data: dict | list[dict]) -> str:
443443
"""
444444
Save the provided data object as a pickle-formatted binary data on the disk.
445445
@@ -460,9 +460,9 @@ def save_binary(self, data: Union[dict, list[dict]]) -> str:
460460
@staticmethod
461461
def from_str(
462462
format: str,
463-
output_path: Optional[str],
464-
input_path: Optional[str],
465-
column: Optional[str],
463+
output_path: str | None,
464+
input_path: str | None,
465+
column: str | None,
466466
overwrite=False,
467467
) -> "PipelineDataFormat":
468468
"""
@@ -507,9 +507,9 @@ class CsvPipelineDataFormat(PipelineDataFormat):
507507

508508
def __init__(
509509
self,
510-
output_path: Optional[str],
511-
input_path: Optional[str],
512-
column: Optional[str],
510+
output_path: str | None,
511+
input_path: str | None,
512+
column: str | None,
513513
overwrite=False,
514514
):
515515
super().__init__(output_path, input_path, column, overwrite=overwrite)
@@ -551,9 +551,9 @@ class JsonPipelineDataFormat(PipelineDataFormat):
551551

552552
def __init__(
553553
self,
554-
output_path: Optional[str],
555-
input_path: Optional[str],
556-
column: Optional[str],
554+
output_path: str | None,
555+
input_path: str | None,
556+
column: str | None,
557557
overwrite=False,
558558
):
559559
super().__init__(output_path, input_path, column, overwrite=overwrite)
@@ -617,7 +617,7 @@ def save(self, data: dict):
617617
"""
618618
print(data)
619619

620-
def save_binary(self, data: Union[dict, list[dict]]) -> str:
620+
def save_binary(self, data: dict | list[dict]) -> str:
621621
if self.output_path is None:
622622
raise KeyError(
623623
"When using piped input on pipeline outputting large object requires an output file path. "
@@ -776,13 +776,13 @@ class Pipeline(_ScikitCompat, PushToHubMixin):
776776
def __init__(
777777
self,
778778
model: "PreTrainedModel",
779-
tokenizer: Optional[PreTrainedTokenizer] = None,
779+
tokenizer: PreTrainedTokenizer | None = None,
780780
feature_extractor: Optional[PreTrainedFeatureExtractor] = None,
781-
image_processor: Optional[BaseImageProcessor] = None,
782-
processor: Optional[ProcessorMixin] = None,
783-
modelcard: Optional[ModelCard] = None,
781+
image_processor: BaseImageProcessor | None = None,
782+
processor: ProcessorMixin | None = None,
783+
modelcard: ModelCard | None = None,
784784
task: str = "",
785-
device: Optional[Union[int, "torch.device"]] = None,
785+
device: Union[int, "torch.device"] | None = None,
786786
binary_output: bool = False,
787787
**kwargs,
788788
):
@@ -939,7 +939,7 @@ def __init__(
939939

940940
def save_pretrained(
941941
self,
942-
save_directory: Union[str, os.PathLike],
942+
save_directory: str | os.PathLike,
943943
safe_serialization: bool = True,
944944
**kwargs: Any,
945945
):
@@ -1085,7 +1085,7 @@ def _ensure_tensor_on_device(self, inputs, device):
10851085
else:
10861086
return inputs
10871087

1088-
def check_model_type(self, supported_models: Union[list[str], dict]):
1088+
def check_model_type(self, supported_models: list[str] | dict):
10891089
"""
10901090
Check if the model class is in supported by the pipeline.
10911091
@@ -1348,9 +1348,9 @@ def register_pipeline(
13481348
self,
13491349
task: str,
13501350
pipeline_class: type,
1351-
pt_model: Optional[Union[type, tuple[type]]] = None,
1352-
default: Optional[dict] = None,
1353-
type: Optional[str] = None,
1351+
pt_model: type | tuple[type] | None = None,
1352+
default: dict | None = None,
1353+
type: str | None = None,
13541354
) -> None:
13551355
if task in self.supported_tasks:
13561356
logger.warning(f"{task} is already registered. Overwriting pipeline for task {task}...")

0 commit comments

Comments
 (0)