huggingface
diff --git a/‎.github/workflows/tests.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/tests.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/detoxifying_a_lm.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/source/detoxifying_a_lm.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/source/dpo_trainer.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/dpo_trainer.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/grpo_trainer.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/grpo_trainer.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/iterative_sft_trainer.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/source/iterative_sft_trainer.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/source/sft_trainer.md‎
Lines changed: 3 additions & 3 deletions b/‎docs/source/sft_trainer.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎examples/research_projects/layer_skip/scripts/benchmark_layer_skip.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/research_projects/layer_skip/scripts/benchmark_layer_skip.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/research_projects/layer_skip/scripts/layer_skip_sft.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/research_projects/layer_skip/scripts/layer_skip_sft.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/research_projects/stack_llama/scripts/merge_peft_adapter.py‎
Lines changed: 2 additions & 4 deletions b/‎examples/research_projects/stack_llama/scripts/merge_peft_adapter.py‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎examples/research_projects/stack_llama/scripts/reward_modeling.py‎
Lines changed: 1 addition & 3 deletions b/‎examples/research_projects/stack_llama/scripts/reward_modeling.py‎
Lines changed: 1 addition & 3 deletions
@@ -235,7 +235,7 @@ jobs:
           uv pip install ".[dev]"
           uv pip install accelerate==1.4.0
           uv pip install datasets==3.0.0
-          uv pip install transformers==4.55.0
+          uv pip install transformers==4.56.0
 
       - name: Test with pytest
         run: |
 
@@ -93,10 +93,10 @@ Our goal is to train models up to 6B parameters, which is about 24GB in float32!
 - Use `bfloat16` precision: Simply load your model in `bfloat16` when calling `from_pretrained` and you can reduce the size of the model by 2:
 
 ```python
-model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", torch_dtype=torch.bfloat16)
+model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", dtype=torch.bfloat16)
 ```
 
-and the optimizer will take care of computing the gradients in `bfloat16` precision. Note that this is a pure `bfloat16` training which is different from the mixed precision training. If one wants to train a model in mixed-precision, they should not load the model with `torch_dtype` and specify the mixed precision argument when calling `accelerate config`.
+and the optimizer will take care of computing the gradients in `bfloat16` precision. Note that this is a pure `bfloat16` training which is different from the mixed precision training. If one wants to train a model in mixed-precision, they should not load the model with `dtype` and specify the mixed precision argument when calling `accelerate config`.
 
 - Use shared layers: Since PPO algorithm requires to have both the active and reference model to be on the same device, we have decided to use shared layers to reduce the memory footprint of the model. This can be achieved by specifying `num_shared_layers` argument when calling the `create_reference_model()` function. For example, if you want to share the first 6 layers of the model, you can do it like this:
 
 
@@ -255,7 +255,7 @@ model = AutoModelForCausalLM.from_pretrained(
     load_in_4bit=True,
     quantization_config=bnb_config,
     attn_implementation="flash_attention_2",
-    torch_dtype=torch.bfloat16,
+    dtype=torch.bfloat16,
     device_map="auto",
 )
 
 
@@ -573,7 +573,7 @@ accelerate launch \
   --output_dir grpo-Qwen2.5-VL-3B-Instruct \
   --learning_rate 1e-5 \
   --gradient_checkpointing \
-  --torch_dtype bfloat16 \
+  --dtype bfloat16 \
   --max_prompt_length 2048 \
   --max_completion_length 1024 \
   --use_vllm \
 
@@ -79,7 +79,7 @@ from trl import IterativeSFTConfig
 
 config = IterativeSFTConfig(
     # Model initialization parameters
-    model_init_kwargs={"torch_dtype": "bfloat16"},
+    model_init_kwargs={"dtype": "bfloat16"},
 
     # Data preprocessing parameters
     max_length=512,
@@ -104,7 +104,7 @@ You can control how the model is initialized by passing keyword arguments to `mo
 ```python
 config = IterativeSFTConfig(
     model_init_kwargs={
-        "torch_dtype": "bfloat16",
+        "dtype": "bfloat16",
         "device_map": "auto",
         "trust_remote_code": True,
     }
 
@@ -130,16 +130,16 @@ While training and evaluating we record the following reward metrics:
 You can directly pass the kwargs of the [`~transformers.AutoModelForCausalLM.from_pretrained()`] method to the [`SFTConfig`]. For example, if you want to load a model in a different precision, analogous to
 
 ```python
-model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3-0.6B", torch_dtype=torch.bfloat16)
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3-0.6B", dtype=torch.bfloat16)
 ```
 
-you can do so by passing the `model_init_kwargs={"torch_dtype": torch.bfloat16}` argument to the [`SFTConfig`].
+you can do so by passing the `model_init_kwargs={"dtype": torch.bfloat16}` argument to the [`SFTConfig`].
 
 ```python
 from trl import SFTConfig
 
 training_args = SFTConfig(
-    model_init_kwargs={"torch_dtype": torch.bfloat16},
+    model_init_kwargs={"dtype": torch.bfloat16},
 )
 ```
 
 
@@ -40,7 +40,7 @@ def generate_tokens_with_assistance(model, inputs, assistant_early_exit):
 if __name__ == "__main__":
     ckpt = config.hub_model_id
 
-    model = AutoModelForCausalLM.from_pretrained(ckpt, device_map="auto", torch_dtype=torch.bfloat16)
+    model = AutoModelForCausalLM.from_pretrained(ckpt, device_map="auto", dtype=torch.bfloat16)
     tokenizer = AutoTokenizer.from_pretrained(ckpt)
 
     prompt = "### Instruction: What are my alarms for the rest of the day?\n ### Response: "
 
@@ -43,7 +43,7 @@ def formatting_prompts_func(example):
 
     # load the model and tokenizer
     print("[INFO] loading the model and tokenizer...")
-    model = AutoModelForCausalLM.from_pretrained(config.model_name, device_map="auto", torch_dtype=torch.bfloat16)
+    model = AutoModelForCausalLM.from_pretrained(config.model_name, device_map="auto", dtype=torch.bfloat16)
     tokenizer = AutoTokenizer.from_pretrained(config.tokenizer_name, add_eos_token=True)
 
     # adding pad and eos tokens if not provided in the tokenizer
 
@@ -42,12 +42,10 @@ class ScriptArguments:
 if peft_config.task_type == "SEQ_CLS":
     # The sequence classification task is used for the reward model in PPO
     model = AutoModelForSequenceClassification.from_pretrained(
-        script_args.base_model_name, num_labels=1, torch_dtype=torch.bfloat16
+        script_args.base_model_name, num_labels=1, dtype=torch.bfloat16
     )
 else:
-    model = AutoModelForCausalLM.from_pretrained(
-        script_args.base_model_name, return_dict=True, torch_dtype=torch.bfloat16
-    )
+    model = AutoModelForCausalLM.from_pretrained(script_args.base_model_name, return_dict=True, dtype=torch.bfloat16)
 
 tokenizer = AutoTokenizer.from_pretrained(script_args.base_model_name)
 
 
@@ -168,9 +168,7 @@ class ScriptArguments:
     lora_dropout=0.1,
 )
 
-model = AutoModelForSequenceClassification.from_pretrained(
-    script_args.model_name, num_labels=1, torch_dtype=torch.bfloat16
-)
+model = AutoModelForSequenceClassification.from_pretrained(script_args.model_name, num_labels=1, dtype=torch.bfloat16)
 model = get_peft_model(model, peft_config)
 model.print_trainable_parameters()
Original file line number	Diff line number	Diff line change
`@@ -255,7 +255,7 @@ model = AutoModelForCausalLM.from_pretrained(`
`255`	`255`	`load_in_4bit=True,`
`256`	`256`	`quantization_config=bnb_config,`
`257`	`257`	`attn_implementation="flash_attention_2",`
`258`		`- torch_dtype=torch.bfloat16,`
	`258`	`+ dtype=torch.bfloat16,`
`259`	`259`	`device_map="auto",`
`260`	`260`	`)`
`261`	`261`
Original file line number	Diff line number	Diff line change
`@@ -168,9 +168,7 @@ class ScriptArguments:`
`168`	`168`	`lora_dropout=0.1,`
`169`	`169`	`)`
`170`	`170`
`171`		`-model = AutoModelForSequenceClassification.from_pretrained(`
`172`		`- script_args.model_name, num_labels=1, torch_dtype=torch.bfloat16`
`173`		`-)`
	`171`	`+model = AutoModelForSequenceClassification.from_pretrained(script_args.model_name, num_labels=1, dtype=torch.bfloat16)`
`174`	`172`	`model = get_peft_model(model, peft_config)`
`175`	`173`	`model.print_trainable_parameters()`
`176`	`174`