huggingface · qgallouedec · Nov 4, 2025 · Sep 30, 2025 · Sep 30, 2025 · Oct 5, 2025
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -36,7 +36,7 @@ jobs:
     name: Tests
     strategy:
       matrix:
-        python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
+        python-version: ['3.10', '3.11', '3.12', '3.13']
       fail-fast: false
     runs-on:
       group: aws-g4dn-2xlarge

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.11.10
+    rev: v0.13.3
     hooks:
       - id: ruff-check
         types_or: [ python, pyi ]

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -315,24 +315,6 @@ def replicate_str(string: str, n: int, sep: str = " ") -> str:
 * **Definite Articles:** Removed definite articles where possible to streamline language. (Eg: Changed "The string to replicate" to "String to replicate")
 * **Type Annotations:**
   * Always include type definitions, indicating if a parameter is optional and specifying the default value.
-  * Note that `Optional` means that the value can be `None`, and `*optional*` means that it is not required for the user to pass a value.
-    E.g., for arguments that can't be `None` and aren't required:
-
-    ```python
-    foo (`int`, *optional*, defaults to `4`):
-    ```
-
-    For arguments that can be `None` and are required:
-
-    ```python
-    foo (`Optional[int]`):
-    ```
-
-    for arguments that can be `None` and aren't required:
-
-    ```python
-    foo (`Optional[int]`, *optional*):
-    ```
 
 * **String Defaults:**
   * Ensured that default string values are wrapped in double quotes:

diff --git a/docs/source/lora_without_regret.md b/docs/source/lora_without_regret.md
@@ -143,7 +143,7 @@ For reinforcement learning, the blog uses a math reasoning task that we can repr
 ```python
 def strip_reasoning_accuracy_reward(
     completions: list[list[dict[str, str]]], solution: list[str], **kwargs
-) -> list[Optional[float]]:
+) -> list[float | None]:
     """Reward function that strips reasoning tags and checks mathematical accuracy.
 
     This function:

diff --git a/examples/datasets/hh-rlhf-helpful-base.py b/examples/datasets/hh-rlhf-helpful-base.py
@@ -14,7 +14,6 @@
 
 import re
 from dataclasses import dataclass, field
-from typing import Optional
 
 from datasets import load_dataset
 from huggingface_hub import ModelCard
@@ -42,15 +41,15 @@ class ScriptArguments:
     repo_id: str = field(
         default="trl-lib/hh-rlhf-helpful-base", metadata={"help": "Hugging Face repository ID to push the dataset to."}
     )
-    dataset_num_proc: Optional[int] = field(
+    dataset_num_proc: int | None = field(
         default=None, metadata={"help": "Number of workers to use for dataset processing."}
     )
 
 
 def common_start(str1: str, str2: str) -> str:
     # Zip the two strings and iterate over them together
     common_chars = []
-    for c1, c2 in zip(str1, str2):
+    for c1, c2 in zip(str1, str2, strict=True):
         if c1 == c2:
             common_chars.append(c1)
         else:

diff --git a/examples/datasets/llava_instruct_mix.py b/examples/datasets/llava_instruct_mix.py
@@ -14,7 +14,6 @@
 
 import ast
 from dataclasses import dataclass, field
-from typing import Optional
 
 from datasets import load_dataset
 from huggingface_hub import ModelCard
@@ -43,7 +42,7 @@ class ScriptArguments:
         default="trl-lib/llava-instruct-mix",
         metadata={"help": "Hugging Face repository ID to push the dataset to."},
     )
-    dataset_num_proc: Optional[int] = field(
+    dataset_num_proc: int | None = field(
         default=None,
         metadata={"help": "Number of workers to use for dataset processing."},
     )

diff --git a/examples/datasets/lm-human-preferences-descriptiveness.py b/examples/datasets/lm-human-preferences-descriptiveness.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 from dataclasses import dataclass, field
-from typing import Optional
 
 from datasets import load_dataset
 from huggingface_hub import ModelCard
@@ -42,7 +41,7 @@ class ScriptArguments:
         default="trl-lib/lm-human-preferences-descriptiveness",
         metadata={"help": "Hugging Face repository ID to push the dataset to."},
     )
-    dataset_num_proc: Optional[int] = field(
+    dataset_num_proc: int | None = field(
         default=None,
         metadata={"help": "Number of workers to use for dataset processing."},
     )

diff --git a/examples/datasets/lm-human-preferences-sentiment.py b/examples/datasets/lm-human-preferences-sentiment.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 from dataclasses import dataclass, field
-from typing import Optional
 
 from datasets import load_dataset
 from huggingface_hub import ModelCard
@@ -42,7 +41,7 @@ class ScriptArguments:
         default="trl-lib/lm-human-preferences-sentiment",
         metadata={"help": "Hugging Face repository ID to push the dataset to."},
     )
-    dataset_num_proc: Optional[int] = field(
+    dataset_num_proc: int | None = field(
         default=None,
         metadata={"help": "Number of workers to use for dataset processing."},
     )

diff --git a/examples/datasets/math_shepherd.py b/examples/datasets/math_shepherd.py
@@ -15,7 +15,6 @@
 import re
 from dataclasses import dataclass, field
 from itertools import chain
-from typing import Optional
 
 from datasets import load_dataset
 from huggingface_hub import ModelCard
@@ -44,7 +43,7 @@ class ScriptArguments:
         default="trl-lib/math_shepherd",
         metadata={"help": "Hugging Face repository ID to push the dataset to."},
     )
-    dataset_num_proc: Optional[int] = field(
+    dataset_num_proc: int | None = field(
         default=None,
         metadata={"help": "Number of workers to use for dataset processing."},
     )
@@ -64,7 +63,7 @@ def process_example(example):
     labels = [example["label"][idx] == "+" for idx in indexes]
 
     # Split the inputs into steps (caution, the first step is missing here, it is the prompt)
-    steps = [inputs[i:j] for i, j in zip(chain([0], indexes), chain(indexes, [None]))]
+    steps = [inputs[i:j] for i, j in zip(chain([0], indexes), chain(indexes, [None]), strict=True)]
 
     # Remove the last step (single ⶻ)
     steps = steps[:-1]

diff --git a/examples/datasets/prm800k.py b/examples/datasets/prm800k.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 from dataclasses import dataclass, field
-from typing import Optional
 
 from datasets import load_dataset
 from huggingface_hub import ModelCard
@@ -42,7 +41,7 @@ class ScriptArguments:
         default="trl-lib/prm800k",
         metadata={"help": "Hugging Face repository ID to push the dataset to."},
     )
-    dataset_num_proc: Optional[int] = field(
+    dataset_num_proc: int | None = field(
         default=None,
         metadata={"help": "Number of workers to use for dataset processing."},
     )

diff --git a/examples/datasets/rlaif-v.py b/examples/datasets/rlaif-v.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 from dataclasses import dataclass, field
-from typing import Optional
 
 from datasets import features, load_dataset
 from huggingface_hub import ModelCard
@@ -42,7 +41,7 @@ class ScriptArguments:
         default="trl-lib/rlaif-v",
         metadata={"help": "Hugging Face repository ID to push the dataset to."},
     )
-    dataset_num_proc: Optional[int] = field(
+    dataset_num_proc: int | None = field(
         default=None,
         metadata={"help": "Number of workers to use for dataset processing."},
     )

diff --git a/examples/datasets/tldr.py b/examples/datasets/tldr.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 from dataclasses import dataclass, field
-from typing import Optional
 
 from datasets import load_dataset
 from huggingface_hub import ModelCard
@@ -42,7 +41,7 @@ class ScriptArguments:
         default="trl-lib/tldr",
         metadata={"help": "Hugging Face repository ID to push the dataset to."},
     )
-    dataset_num_proc: Optional[int] = field(
+    dataset_num_proc: int | None = field(
         default=None,
         metadata={"help": "Number of workers to use for dataset processing."},
     )

diff --git a/examples/datasets/tldr_preference.py b/examples/datasets/tldr_preference.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 from dataclasses import dataclass, field
-from typing import Optional
 
 from datasets import load_dataset
 from huggingface_hub import ModelCard
@@ -42,7 +41,7 @@ class ScriptArguments:
         default="trl-lib/tldr-preference",
         metadata={"help": "Hugging Face repository ID to push the dataset to."},
     )
-    dataset_num_proc: Optional[int] = field(
+    dataset_num_proc: int | None = field(
         default=None,
         metadata={"help": "Number of workers to use for dataset processing."},
     )

diff --git a/examples/datasets/ultrafeedback-prompt.py b/examples/datasets/ultrafeedback-prompt.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 from dataclasses import dataclass, field
-from typing import Optional
 
 from datasets import load_dataset
 from huggingface_hub import ModelCard
@@ -42,7 +41,7 @@ class ScriptArguments:
         default="trl-lib/ultrafeedback-prompt",
         metadata={"help": "Hugging Face repository ID to push the dataset to."},
     )
-    dataset_num_proc: Optional[int] = field(
+    dataset_num_proc: int | None = field(
         default=None,
         metadata={"help": "Number of workers to use for dataset processing."},
     )

diff --git a/examples/datasets/ultrafeedback.py b/examples/datasets/ultrafeedback.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 from dataclasses import dataclass, field
-from typing import Optional
 
 from datasets import load_dataset
 from huggingface_hub import ModelCard
@@ -79,7 +78,7 @@ class ScriptArguments:
         default="trl-lib/ultrafeedback-gpt-3.5-turbo-helpfulness",
         metadata={"help": "Hugging Face repository ID to push the dataset to."},
     )
-    dataset_num_proc: Optional[int] = field(
+    dataset_num_proc: int | None = field(
         default=None,
         metadata={"help": "Number of workers to use for dataset processing."},
     )

diff --git a/examples/research_projects/stack_llama/scripts/merge_peft_adapter.py b/examples/research_projects/stack_llama/scripts/merge_peft_adapter.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 from dataclasses import dataclass, field
-from typing import Optional
 
 import torch
 from peft import PeftConfig, PeftModel
@@ -27,9 +26,9 @@ class ScriptArguments:
     merged model.
     """
 
-    adapter_model_name: Optional[str] = field(default=None, metadata={"help": "the adapter name"})
-    base_model_name: Optional[str] = field(default=None, metadata={"help": "the base model name"})
-    output_name: Optional[str] = field(default=None, metadata={"help": "the merged model name"})
+    adapter_model_name: str | None = field(default=None, metadata={"help": "the adapter name"})
+    base_model_name: str | None = field(default=None, metadata={"help": "the base model name"})
+    output_name: str | None = field(default=None, metadata={"help": "the merged model name"})
 
 
 parser = HfArgumentParser(ScriptArguments)

diff --git a/examples/research_projects/stack_llama/scripts/reward_modeling.py b/examples/research_projects/stack_llama/scripts/reward_modeling.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 from dataclasses import dataclass, field
-from typing import Any, Optional, Union
+from typing import Any
 
 import evaluate
 import numpy as np
@@ -41,70 +41,70 @@ class ScriptArguments:
     These arguments vary depending on how many GPUs you have, what their capacity and features are, and what size model you want to train.
     """
 
-    local_rank: Optional[int] = field(default=-1, metadata={"help": "Used for multi-gpu"})
-    resume_from_checkpoint: Optional[bool] = field(
+    local_rank: int | None = field(default=-1, metadata={"help": "Used for multi-gpu"})
+    resume_from_checkpoint: bool | None = field(
         default=False,
         metadata={"help": "If you want to resume training where it left off."},
     )
-    deepspeed: Optional[str] = field(
+    deepspeed: str | None = field(
         default=None,
         metadata={
             "help": "Path to deepspeed config if using deepspeed. You may need this if the model that you want to train doesn't fit on a single GPU."
         },
     )
-    per_device_train_batch_size: Optional[int] = field(default=4)
-    per_device_eval_batch_size: Optional[int] = field(default=1)
-    gradient_accumulation_steps: Optional[int] = field(default=1)
-    learning_rate: Optional[float] = field(default=2e-5)
-    weight_decay: Optional[float] = field(default=0.001)
-    model_name: Optional[str] = field(
+    per_device_train_batch_size: int | None = field(default=4)
+    per_device_eval_batch_size: int | None = field(default=1)
+    gradient_accumulation_steps: int | None = field(default=1)
+    learning_rate: float | None = field(default=2e-5)
+    weight_decay: float | None = field(default=0.001)
+    model_name: str | None = field(
         default="gpt2",
         metadata={
             "help": "The model that you want to train from the Hugging Face hub. E.g. gpt2, gpt2-xl, bert, etc."
         },
     )
-    tokenizer_name: Optional[str] = field(
+    tokenizer_name: str | None = field(
         default=None,
         metadata={
             "help": "The tokenizer for your model, if left empty will use the default for your model",
         },
     )
-    bf16: Optional[bool] = field(
+    bf16: bool | None = field(
         default=True,
         metadata={
             "help": "This essentially cuts the training time in half if you want to sacrifice a little precision and have a supported GPU."
         },
     )
-    num_train_epochs: Optional[int] = field(
+    num_train_epochs: int | None = field(
         default=1,
         metadata={"help": "The number of training epochs for the reward model."},
     )
-    train_subset: Optional[int] = field(
+    train_subset: int | None = field(
         default=100000,
         metadata={"help": "The size of the subset of the training data to use"},
     )
-    eval_subset: Optional[int] = field(
+    eval_subset: int | None = field(
         default=50000,
         metadata={"help": "The size of the subset of the eval data to use"},
     )
-    gradient_checkpointing: Optional[bool] = field(
+    gradient_checkpointing: bool | None = field(
         default=False,
         metadata={"help": "Enables gradient checkpointing."},
     )
-    optim: Optional[str] = field(
+    optim: str | None = field(
         default="adamw_hf",
         metadata={"help": "The optimizer to use."},
     )
-    lr_scheduler_type: Optional[str] = field(
+    lr_scheduler_type: str | None = field(
         default="linear",
         metadata={"help": "The lr scheduler"},
     )
-    max_length: Optional[int] = field(default=512)
-    eval_first_step: Optional[bool] = field(
+    max_length: int | None = field(default=512)
+    eval_first_step: bool | None = field(
         default=False,
         metadata={"help": "Whether to run eval after the first step"},
     )
-    seed: Optional[int] = field(
+    seed: int | None = field(
         default=0, metadata={"help": "Random seed that will be set at the beginning of training."}
     )
 
@@ -189,7 +189,9 @@ def preprocess_function(examples):
         "input_ids_k": [],
         "attention_mask_k": [],
     }
-    for question, response_j, response_k in zip(examples["question"], examples["response_j"], examples["response_k"]):
+    for question, response_j, response_k in zip(
+        examples["question"], examples["response_j"], examples["response_k"], strict=True
+    ):
         tokenized_j = tokenizer("Question: " + question + "\n\nAnswer: " + response_j, truncation=True)
         tokenized_k = tokenizer("Question: " + question + "\n\nAnswer: " + response_k, truncation=True)
 
@@ -229,8 +231,8 @@ def preprocess_function(examples):
 @dataclass
 class RewardDataCollatorWithPadding:
     tokenizer: PreTrainedTokenizerBase
-    padding: Union[bool, str, PaddingStrategy] = True
-    pad_to_multiple_of: Optional[int] = None
+    padding: bool | str | PaddingStrategy = True
+    pad_to_multiple_of: int | None = None
     return_tensors: str = "pt"
 
     def __call__(self, features: list[dict[str, Any]]) -> dict[str, Any]: