Skip to content
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
0d54019
Drop Python 3.9, add Python 3.14
qgallouedec Sep 30, 2025
f69c919
style
qgallouedec Sep 30, 2025
7310796
allow prerelease
qgallouedec Oct 5, 2025
b991fd4
target python version 3.10 for ruff
qgallouedec Oct 5, 2025
4e239a6
Merge branch 'main' into py3.14
qgallouedec Oct 5, 2025
d7fe889
apply precommit
qgallouedec Oct 5, 2025
5004c95
apply precommit
qgallouedec Oct 5, 2025
0ccfe5d
strict=True
qgallouedec Oct 5, 2025
dd56aaa
revert unwanted change
qgallouedec Oct 5, 2025
b41bcbd
no py314
qgallouedec Oct 5, 2025
b691f39
fix tools type hint
qgallouedec Oct 5, 2025
b07df79
Squashed commit of the following:
qgallouedec Oct 6, 2025
68959ad
Squashed commit of the following:
qgallouedec Oct 6, 2025
cbb41f7
Merge branch 'main' into py3.14
qgallouedec Oct 6, 2025
a33e642
style
qgallouedec Oct 6, 2025
88eee87
rm prerelease
qgallouedec Oct 6, 2025
c97bb24
rm whitespace
qgallouedec Oct 6, 2025
d66ea24
revert video change
qgallouedec Oct 6, 2025
cfcec4a
some missing
qgallouedec Oct 6, 2025
a5ca7d4
style
qgallouedec Oct 6, 2025
a6263a5
Merge branch 'main' into py3.14
qgallouedec Oct 7, 2025
a108a71
Squashed commit of the following:
qgallouedec Oct 30, 2025
ea729c3
Merge branch 'main' into py3.14
qgallouedec Oct 30, 2025
b209aef
fix gold
qgallouedec Oct 30, 2025
0bb1649
Merge branch 'main' into py3.14
qgallouedec Oct 30, 2025
93673bc
Merge branch 'main' into py3.14
qgallouedec Nov 4, 2025
559288b
style
qgallouedec Nov 4, 2025
0a5277b
Merge branch 'main' into py3.14
qgallouedec Nov 4, 2025
629feef
Remove Python 3.9 deprecation warning from the codebase
qgallouedec Nov 4, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
name: Tests
strategy:
matrix:
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
python-version: ['3.10', '3.11', '3.12', '3.13']
fail-fast: false
runs-on:
group: aws-g4dn-2xlarge
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.11.10
rev: v0.13.3
hooks:
- id: ruff-check
types_or: [ python, pyi ]
Expand Down
18 changes: 0 additions & 18 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -315,24 +315,6 @@ def replicate_str(string: str, n: int, sep: str = " ") -> str:
* **Definite Articles:** Removed definite articles where possible to streamline language. (Eg: Changed "The string to replicate" to "String to replicate")
* **Type Annotations:**
* Always include type definitions, indicating if a parameter is optional and specifying the default value.
* Note that `Optional` means that the value can be `None`, and `*optional*` means that it is not required for the user to pass a value.
E.g., for arguments that can't be `None` and aren't required:

```python
foo (`int`, *optional*, defaults to `4`):
```

For arguments that can be `None` and are required:

```python
foo (`Optional[int]`):
```

for arguments that can be `None` and aren't required:

```python
foo (`Optional[int]`, *optional*):
```

* **String Defaults:**
* Ensured that default string values are wrapped in double quotes:
Expand Down
2 changes: 1 addition & 1 deletion docs/source/lora_without_regret.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ For reinforcement learning, the blog uses a math reasoning task that we can repr
```python
def strip_reasoning_accuracy_reward(
completions: list[list[dict[str, str]]], solution: list[str], **kwargs
) -> list[Optional[float]]:
) -> list[float | None]:
"""Reward function that strips reasoning tags and checks mathematical accuracy.

This function:
Expand Down
5 changes: 2 additions & 3 deletions examples/datasets/hh-rlhf-helpful-base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

import re
from dataclasses import dataclass, field
from typing import Optional

from datasets import load_dataset
from huggingface_hub import ModelCard
Expand Down Expand Up @@ -42,15 +41,15 @@ class ScriptArguments:
repo_id: str = field(
default="trl-lib/hh-rlhf-helpful-base", metadata={"help": "Hugging Face repository ID to push the dataset to."}
)
dataset_num_proc: Optional[int] = field(
dataset_num_proc: int | None = field(
default=None, metadata={"help": "Number of workers to use for dataset processing."}
)


def common_start(str1: str, str2: str) -> str:
# Zip the two strings and iterate over them together
common_chars = []
for c1, c2 in zip(str1, str2):
for c1, c2 in zip(str1, str2, strict=True):
if c1 == c2:
common_chars.append(c1)
else:
Expand Down
3 changes: 1 addition & 2 deletions examples/datasets/llava_instruct_mix.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

import ast
from dataclasses import dataclass, field
from typing import Optional

from datasets import load_dataset
from huggingface_hub import ModelCard
Expand Down Expand Up @@ -43,7 +42,7 @@ class ScriptArguments:
default="trl-lib/llava-instruct-mix",
metadata={"help": "Hugging Face repository ID to push the dataset to."},
)
dataset_num_proc: Optional[int] = field(
dataset_num_proc: int | None = field(
default=None,
metadata={"help": "Number of workers to use for dataset processing."},
)
Expand Down
3 changes: 1 addition & 2 deletions examples/datasets/lm-human-preferences-descriptiveness.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

from dataclasses import dataclass, field
from typing import Optional

from datasets import load_dataset
from huggingface_hub import ModelCard
Expand Down Expand Up @@ -42,7 +41,7 @@ class ScriptArguments:
default="trl-lib/lm-human-preferences-descriptiveness",
metadata={"help": "Hugging Face repository ID to push the dataset to."},
)
dataset_num_proc: Optional[int] = field(
dataset_num_proc: int | None = field(
default=None,
metadata={"help": "Number of workers to use for dataset processing."},
)
Expand Down
3 changes: 1 addition & 2 deletions examples/datasets/lm-human-preferences-sentiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

from dataclasses import dataclass, field
from typing import Optional

from datasets import load_dataset
from huggingface_hub import ModelCard
Expand Down Expand Up @@ -42,7 +41,7 @@ class ScriptArguments:
default="trl-lib/lm-human-preferences-sentiment",
metadata={"help": "Hugging Face repository ID to push the dataset to."},
)
dataset_num_proc: Optional[int] = field(
dataset_num_proc: int | None = field(
default=None,
metadata={"help": "Number of workers to use for dataset processing."},
)
Expand Down
5 changes: 2 additions & 3 deletions examples/datasets/math_shepherd.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import re
from dataclasses import dataclass, field
from itertools import chain
from typing import Optional

from datasets import load_dataset
from huggingface_hub import ModelCard
Expand Down Expand Up @@ -44,7 +43,7 @@ class ScriptArguments:
default="trl-lib/math_shepherd",
metadata={"help": "Hugging Face repository ID to push the dataset to."},
)
dataset_num_proc: Optional[int] = field(
dataset_num_proc: int | None = field(
default=None,
metadata={"help": "Number of workers to use for dataset processing."},
)
Expand All @@ -64,7 +63,7 @@ def process_example(example):
labels = [example["label"][idx] == "+" for idx in indexes]

# Split the inputs into steps (caution, the first step is missing here, it is the prompt)
steps = [inputs[i:j] for i, j in zip(chain([0], indexes), chain(indexes, [None]))]
steps = [inputs[i:j] for i, j in zip(chain([0], indexes), chain(indexes, [None]), strict=True)]

# Remove the last step (single ⶻ)
steps = steps[:-1]
Expand Down
3 changes: 1 addition & 2 deletions examples/datasets/prm800k.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

from dataclasses import dataclass, field
from typing import Optional

from datasets import load_dataset
from huggingface_hub import ModelCard
Expand Down Expand Up @@ -42,7 +41,7 @@ class ScriptArguments:
default="trl-lib/prm800k",
metadata={"help": "Hugging Face repository ID to push the dataset to."},
)
dataset_num_proc: Optional[int] = field(
dataset_num_proc: int | None = field(
default=None,
metadata={"help": "Number of workers to use for dataset processing."},
)
Expand Down
3 changes: 1 addition & 2 deletions examples/datasets/rlaif-v.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

from dataclasses import dataclass, field
from typing import Optional

from datasets import features, load_dataset
from huggingface_hub import ModelCard
Expand Down Expand Up @@ -42,7 +41,7 @@ class ScriptArguments:
default="trl-lib/rlaif-v",
metadata={"help": "Hugging Face repository ID to push the dataset to."},
)
dataset_num_proc: Optional[int] = field(
dataset_num_proc: int | None = field(
default=None,
metadata={"help": "Number of workers to use for dataset processing."},
)
Expand Down
3 changes: 1 addition & 2 deletions examples/datasets/tldr.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

from dataclasses import dataclass, field
from typing import Optional

from datasets import load_dataset
from huggingface_hub import ModelCard
Expand Down Expand Up @@ -42,7 +41,7 @@ class ScriptArguments:
default="trl-lib/tldr",
metadata={"help": "Hugging Face repository ID to push the dataset to."},
)
dataset_num_proc: Optional[int] = field(
dataset_num_proc: int | None = field(
default=None,
metadata={"help": "Number of workers to use for dataset processing."},
)
Expand Down
3 changes: 1 addition & 2 deletions examples/datasets/tldr_preference.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

from dataclasses import dataclass, field
from typing import Optional

from datasets import load_dataset
from huggingface_hub import ModelCard
Expand Down Expand Up @@ -42,7 +41,7 @@ class ScriptArguments:
default="trl-lib/tldr-preference",
metadata={"help": "Hugging Face repository ID to push the dataset to."},
)
dataset_num_proc: Optional[int] = field(
dataset_num_proc: int | None = field(
default=None,
metadata={"help": "Number of workers to use for dataset processing."},
)
Expand Down
3 changes: 1 addition & 2 deletions examples/datasets/ultrafeedback-prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

from dataclasses import dataclass, field
from typing import Optional

from datasets import load_dataset
from huggingface_hub import ModelCard
Expand Down Expand Up @@ -42,7 +41,7 @@ class ScriptArguments:
default="trl-lib/ultrafeedback-prompt",
metadata={"help": "Hugging Face repository ID to push the dataset to."},
)
dataset_num_proc: Optional[int] = field(
dataset_num_proc: int | None = field(
default=None,
metadata={"help": "Number of workers to use for dataset processing."},
)
Expand Down
3 changes: 1 addition & 2 deletions examples/datasets/ultrafeedback.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

from dataclasses import dataclass, field
from typing import Optional

from datasets import load_dataset
from huggingface_hub import ModelCard
Expand Down Expand Up @@ -79,7 +78,7 @@ class ScriptArguments:
default="trl-lib/ultrafeedback-gpt-3.5-turbo-helpfulness",
metadata={"help": "Hugging Face repository ID to push the dataset to."},
)
dataset_num_proc: Optional[int] = field(
dataset_num_proc: int | None = field(
default=None,
metadata={"help": "Number of workers to use for dataset processing."},
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

from dataclasses import dataclass, field
from typing import Optional

import torch
from peft import PeftConfig, PeftModel
Expand All @@ -27,9 +26,9 @@ class ScriptArguments:
merged model.
"""

adapter_model_name: Optional[str] = field(default=None, metadata={"help": "the adapter name"})
base_model_name: Optional[str] = field(default=None, metadata={"help": "the base model name"})
output_name: Optional[str] = field(default=None, metadata={"help": "the merged model name"})
adapter_model_name: str | None = field(default=None, metadata={"help": "the adapter name"})
base_model_name: str | None = field(default=None, metadata={"help": "the base model name"})
output_name: str | None = field(default=None, metadata={"help": "the merged model name"})


parser = HfArgumentParser(ScriptArguments)
Expand Down
50 changes: 26 additions & 24 deletions examples/research_projects/stack_llama/scripts/reward_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.

from dataclasses import dataclass, field
from typing import Any, Optional, Union
from typing import Any

import evaluate
import numpy as np
Expand Down Expand Up @@ -41,70 +41,70 @@ class ScriptArguments:
These arguments vary depending on how many GPUs you have, what their capacity and features are, and what size model you want to train.
"""

local_rank: Optional[int] = field(default=-1, metadata={"help": "Used for multi-gpu"})
resume_from_checkpoint: Optional[bool] = field(
local_rank: int | None = field(default=-1, metadata={"help": "Used for multi-gpu"})
resume_from_checkpoint: bool | None = field(
default=False,
metadata={"help": "If you want to resume training where it left off."},
)
deepspeed: Optional[str] = field(
deepspeed: str | None = field(
default=None,
metadata={
"help": "Path to deepspeed config if using deepspeed. You may need this if the model that you want to train doesn't fit on a single GPU."
},
)
per_device_train_batch_size: Optional[int] = field(default=4)
per_device_eval_batch_size: Optional[int] = field(default=1)
gradient_accumulation_steps: Optional[int] = field(default=1)
learning_rate: Optional[float] = field(default=2e-5)
weight_decay: Optional[float] = field(default=0.001)
model_name: Optional[str] = field(
per_device_train_batch_size: int | None = field(default=4)
per_device_eval_batch_size: int | None = field(default=1)
gradient_accumulation_steps: int | None = field(default=1)
learning_rate: float | None = field(default=2e-5)
weight_decay: float | None = field(default=0.001)
model_name: str | None = field(
default="gpt2",
metadata={
"help": "The model that you want to train from the Hugging Face hub. E.g. gpt2, gpt2-xl, bert, etc."
},
)
tokenizer_name: Optional[str] = field(
tokenizer_name: str | None = field(
default=None,
metadata={
"help": "The tokenizer for your model, if left empty will use the default for your model",
},
)
bf16: Optional[bool] = field(
bf16: bool | None = field(
default=True,
metadata={
"help": "This essentially cuts the training time in half if you want to sacrifice a little precision and have a supported GPU."
},
)
num_train_epochs: Optional[int] = field(
num_train_epochs: int | None = field(
default=1,
metadata={"help": "The number of training epochs for the reward model."},
)
train_subset: Optional[int] = field(
train_subset: int | None = field(
default=100000,
metadata={"help": "The size of the subset of the training data to use"},
)
eval_subset: Optional[int] = field(
eval_subset: int | None = field(
default=50000,
metadata={"help": "The size of the subset of the eval data to use"},
)
gradient_checkpointing: Optional[bool] = field(
gradient_checkpointing: bool | None = field(
default=False,
metadata={"help": "Enables gradient checkpointing."},
)
optim: Optional[str] = field(
optim: str | None = field(
default="adamw_hf",
metadata={"help": "The optimizer to use."},
)
lr_scheduler_type: Optional[str] = field(
lr_scheduler_type: str | None = field(
default="linear",
metadata={"help": "The lr scheduler"},
)
max_length: Optional[int] = field(default=512)
eval_first_step: Optional[bool] = field(
max_length: int | None = field(default=512)
eval_first_step: bool | None = field(
default=False,
metadata={"help": "Whether to run eval after the first step"},
)
seed: Optional[int] = field(
seed: int | None = field(
default=0, metadata={"help": "Random seed that will be set at the beginning of training."}
)

Expand Down Expand Up @@ -189,7 +189,9 @@ def preprocess_function(examples):
"input_ids_k": [],
"attention_mask_k": [],
}
for question, response_j, response_k in zip(examples["question"], examples["response_j"], examples["response_k"]):
for question, response_j, response_k in zip(
examples["question"], examples["response_j"], examples["response_k"], strict=True
):
tokenized_j = tokenizer("Question: " + question + "\n\nAnswer: " + response_j, truncation=True)
tokenized_k = tokenizer("Question: " + question + "\n\nAnswer: " + response_k, truncation=True)

Expand Down Expand Up @@ -229,8 +231,8 @@ def preprocess_function(examples):
@dataclass
class RewardDataCollatorWithPadding:
tokenizer: PreTrainedTokenizerBase
padding: Union[bool, str, PaddingStrategy] = True
pad_to_multiple_of: Optional[int] = None
padding: bool | str | PaddingStrategy = True
pad_to_multiple_of: int | None = None
return_tensors: str = "pt"

def __call__(self, features: list[dict[str, Any]]) -> dict[str, Any]:
Expand Down
Loading
Loading