-
-
Notifications
You must be signed in to change notification settings - Fork 3.9k
Closed
Labels
bugfixed - pending confirmationFixed, waiting for confirmation from posterFixed, waiting for confirmation from poster
Description
- Did you update?
pip install --upgrade unsloth unsloth_zooyes ColaborKaggleor local / cloud cloud- Number GPUs used, use
nvidia-smi1 - Which notebook? Please link! based on LLama 3.1 GRPO
- Which Unsloth version, TRL version, transformers version, PyTorch version?
unsloth: 2025.7.1
unsloth_zoo: 2025.7.1
trl: 0.19.1
triton: 3.3.0
transformers: 4.53.1
torch: 2.7.0 - Which trainer?
SFTTrainer,GRPOTraineretc GRPOTrainer
I'm facing the error as in the title when trying to fine tune llama-3.1 with GRPO based on one of the notebooks.
Trace:
[rank0]: Traceback (most recent call last):
[rank0]: File "/workspace/data-science/ble.py", line 100, in <module>
[rank0]: run()
[rank0]: File "/workspace/data-science/ble.py", line 87, in run
[rank0]: trainer.train(resume_from_checkpoint=False)
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/transformers/trainer.py", line 2206, in train
[rank0]: return inner_training_loop(
[rank0]: ^^^^^^^^^^^^^^^^^^^^
[rank0]: File "<string>", line 320, in _fast_inner_training_loop
[rank0]: File "<string>", line 34, in _unsloth_training_step
[rank0]: File "/workspace/data-science/unsloth_compiled_cache/UnslothGRPOTrainer.py", line 2040, in compute_loss
[rank0]: loss, completion_length, mean_kl = grpo_accumulated_loss(
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/unsloth_compiled_cache/UnslothGRPOTrainer.py", line 313, in grpo_accumulated_loss
[rank0]: ref_hidden_states = trainer.model(
[rank0]: ^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
[rank0]: return self._call_impl(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
[rank0]: return forward_call(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/unsloth/models/llama.py", line 1292, in PeftModel_fast_forward
[rank0]: return self.base_model(
[rank0]: ^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
[rank0]: return self._call_impl(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
[rank0]: return forward_call(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/peft/tuners/tuners_utils.py", line 216, in forward
[rank0]: return self.model.forward(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/unsloth/models/llama.py", line 1115, in _CausalLM_fast_forward
[rank0]: outputs = self.model(
[rank0]: ^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
[rank0]: return self._call_impl(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
[rank0]: return forward_call(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/unsloth/models/llama.py", line 931, in LlamaModel_fast_forward
[rank0]: layer_outputs = decoder_layer(
[rank0]: ^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/transformers/modeling_layers.py", line 82, in __call__
[rank0]: return self._gradient_checkpointing_func(partial(super().__call__, **kwargs), *args)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/torch/_compile.py", line 51, in inner
[rank0]: return disable_fn(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 838, in _fn
[rank0]: return fn(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/torch/utils/checkpoint.py", line 488, in checkpoint
[rank0]: return CheckpointFunction.apply(function, preserve, *args)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/torch/autograd/function.py", line 575, in apply
[rank0]: return super().apply(*args, **kwargs) # type: ignore[misc]
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/unsloth_zoo/gradient_checkpointing.py", line 475, in forward
[rank0]: outputs = run_function(*args)
[rank0]: ^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
[rank0]: return self._call_impl(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
[rank0]: return forward_call(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/unsloth/models/llama.py", line 604, in LlamaDecoderLayer_fast_forward
[rank0]: hidden_states, self_attn_weights, present_key_value = self.self_attn(
[rank0]: ^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
[rank0]: return self._call_impl(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
[rank0]: return forward_call(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/unsloth/models/llama.py", line 458, in LlamaAttention_fast_forward
[rank0]: Q, K, V = self.apply_qkv(self, hidden_states)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/unsloth/kernels/fast_lora.py", line 366, in apply_lora_qkv
[rank0]: Q, K, V = LoRA_QKV.apply(X,
[rank0]: ^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/torch/autograd/function.py", line 575, in apply
[rank0]: return super().apply(*args, **kwargs) # type: ignore[misc]
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/torch/amp/autocast_mode.py", line 510, in decorate_fwd
[rank0]: return fwd(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/unsloth/kernels/fast_lora.py", line 259, in forward
[rank0]: Q = matmul_lora(X, QW, QW_quant, QA, QB, QS)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/unsloth/kernels/utils.py", line 693, in matmul_lora
[rank0]: W = fast_dequantize(W.t(), W_quant, use_global_buffer = True)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
[rank0]: return func(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/workspace/data-science/env/lib/python3.12/site-packages/unsloth/kernels/utils.py", line 321, in fast_dequantize
[rank0]: absmax2 = state2.absmax
[rank0]: ^^^^^^^^^^^^^
[rank0]: AttributeError: 'NoneType' object has no attribute 'absmax'
Full pip freeze output:
accelerate==1.8.1
aiohappyeyeballs==2.6.1
aiohttp==3.12.13
aiosignal==1.4.0
airportsdata==20250706
annotated-types==0.7.0
anyio==4.9.0
astor==0.8.1
attrs==25.3.0
bitsandbytes==0.46.1
blake3==1.0.5
cachetools==6.1.0
certifi==2025.7.9
charset-normalizer==3.4.2
click==8.2.1
cloudpickle==3.1.1
compressed-tensors==0.10.2
cupy-cuda12x==13.4.1
cut-cross-entropy==25.1.1
datasets==4.0.0
depyf==0.18.0
diffusers==0.34.0
dill==0.3.8
diskcache==5.6.3
distro==1.9.0
dnspython==2.7.0
docstring_parser==0.16
einops==0.8.1
email_validator==2.2.0
fastapi==0.116.0
fastapi-cli==0.0.8
fastapi-cloud-cli==0.1.2
fastrlock==0.8.3
filelock==3.18.0
frozenlist==1.7.0
fsspec==2025.3.0
gguf==0.17.1
h11==0.16.0
hf-xet==1.1.5
hf_transfer==0.1.9
httpcore==1.0.9
httptools==0.6.4
httpx==0.28.1
huggingface-hub==0.33.2
idna==3.10
importlib_metadata==8.7.0
interegular==0.3.3
Jinja2==3.1.6
jiter==0.10.0
jsonschema==4.24.0
jsonschema-specifications==2025.4.1
lark==1.2.2
llguidance==0.7.30
llvmlite==0.44.0
lm-format-enforcer==0.10.11
markdown-it-py==3.0.0
MarkupSafe==3.0.2
mdurl==0.1.2
mistral_common==1.7.0
mpmath==1.3.0
msgpack==1.1.1
msgspec==0.19.0
multidict==6.6.3
multiprocess==0.70.16
nest-asyncio==1.6.0
networkx==3.5
ninja==1.11.1.4
numba==0.61.2
numpy==2.2.6
nvidia-cublas-cu12==12.6.4.1
nvidia-cuda-cupti-cu12==12.6.80
nvidia-cuda-nvrtc-cu12==12.6.77
nvidia-cuda-runtime-cu12==12.6.77
nvidia-cudnn-cu12==9.5.1.17
nvidia-cufft-cu12==11.3.0.4
nvidia-cufile-cu12==1.11.1.6
nvidia-curand-cu12==10.3.7.77
nvidia-cusolver-cu12==11.7.1.2
nvidia-cusparse-cu12==12.5.4.2
nvidia-cusparselt-cu12==0.6.3
nvidia-nccl-cu12==2.26.2
nvidia-nvjitlink-cu12==12.6.85
nvidia-nvtx-cu12==12.6.77
openai==1.90.0
opencv-python-headless==4.12.0.88
outlines==0.1.11
outlines_core==0.1.26
packaging==25.0
pandas==2.3.1
partial-json-parser==0.2.1.1.post6
peft==0.16.0
pillow==11.3.0
prometheus-fastapi-instrumentator==7.1.0
prometheus_client==0.22.1
propcache==0.3.2
protobuf==3.20.3
psutil==7.0.0
py-cpuinfo==9.0.0
pyarrow==20.0.0
pybase64==1.4.1
pycountry==24.6.1
pydantic==2.11.7
pydantic_core==2.33.2
Pygments==2.19.2
python-dateutil==2.9.0.post0
python-dotenv==1.1.1
python-json-logger==3.3.0
python-multipart==0.0.20
pytz==2025.2
PyYAML==6.0.2
pyzmq==27.0.0
ray==2.47.1
referencing==0.36.2
regex==2024.11.6
requests==2.32.4
rich==14.0.0
rich-toolkit==0.14.8
rignore==0.5.1
rpds-py==0.26.0
safetensors==0.5.3
scipy==1.16.0
sentencepiece==0.2.0
sentry-sdk==2.32.0
setuptools==79.0.1
shellingham==1.5.4
shtab==1.7.2
six==1.17.0
sniffio==1.3.1
starlette==0.46.2
sympy==1.14.0
tiktoken==0.9.0
tokenizers==0.21.2
torch==2.7.0
torchaudio==2.7.0
torchvision==0.22.0
tqdm==4.67.1
transformers==4.53.1
triton==3.3.0
trl==0.19.1
typeguard==4.4.4
typer==0.16.0
typing-inspection==0.4.1
typing_extensions==4.14.1
tyro==0.9.26
tzdata==2025.2
unsloth==2025.7.1
unsloth_zoo==2025.7.1
urllib3==2.5.0
uvicorn==0.35.0
uvloop==0.21.0
vllm==0.9.2
watchfiles==1.1.0
websockets==15.0.1
wheel==0.45.1
xformers==0.0.30
xgrammar==0.1.19
xxhash==3.5.0
yarl==1.20.1
zipp==3.23.0
nvcc --version - happened on multiple different ones 12.4, 12.6
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2025 NVIDIA Corporation
Built on Fri_Feb_21_20:23:50_PST_2025
Cuda compilation tools, release 12.8, V12.8.93
Build cuda_12.8.r12.8/compiler.35583870_0
from unsloth import FastLanguageModel, is_bfloat16_supported
from datetime import datetime
import datasets
import random
max_seq_length = 9_000 # Can increase for longer reasoning traces
lora_rank = 64 # Larger rank = smarter, but slower
RANDOM_SEED = 1337
CURRENT_TIME = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
def format_prompt(n):
return {
"prompt": [
{"role": "system", "content": "Write N words on any topic."},
{"role": "user", "content": f"N: {n}"}
]
}
dataset_formatted = datasets.Dataset.from_list([format_prompt(random.randint(100, 200)) for _ in range(1000)])
def run():
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "meta-llama/meta-Llama-3.1-8B-Instruct",
max_seq_length = max_seq_length,
load_in_4bit = True, # False for LoRA 16bit
fast_inference = True, # Enable vLLM fast inference
max_lora_rank = lora_rank,
gpu_memory_utilization = 0.7, # Reduce if out of memory
)
model = FastLanguageModel.get_peft_model(
model,
r = lora_rank, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
target_modules = [
"q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",
], # Remove QKVO if out of memory
lora_alpha = lora_rank,
use_gradient_checkpointing = "unsloth", # Enable long context finetuning
random_state = 3407,
)
def foo_reward_func(prompts, completions, **kwargs) -> list[float]:
return [1.0] * len(completions)
from trl import GRPOConfig, GRPOTrainer
training_args = GRPOConfig(
use_vllm = True, # use vLLM for fast inference!
learning_rate = 5e-6,
adam_beta1 = 0.9,
adam_beta2 = 0.99,
weight_decay = 0.1,
warmup_ratio = 0.1,
lr_scheduler_type = "cosine",
optim = "paged_adamw_8bit",
logging_steps = 1,
bf16 = is_bfloat16_supported(),
fp16 = not is_bfloat16_supported(),
per_device_train_batch_size = 6,
gradient_accumulation_steps = 6, # Increase to 4 for smoother training
num_generations = 6, # Decrease if out of memory
max_prompt_length = 4024,
max_completion_length = 4024,
# num_train_epochs = 1, # Set to 1 for a full training run
max_steps = 1000,
save_steps = 100,
max_grad_norm = 0.1,
output_dir = f"outputs_foo_model",
report_to="none",
temperature = 0.8,
)
try:
trainer = GRPOTrainer(
model = model,
processing_class = tokenizer,
reward_funcs = [
foo_reward_func,
],
args = training_args,
train_dataset = dataset_formatted,
)
trainer.train(resume_from_checkpoint=False)
except KeyboardInterrupt:
print("Exiting gracefully!")
MODEL_OUTPUT_DIR = f"foo_model_16b_{CURRENT_TIME}"
model.save_pretrained_merged(
MODEL_OUTPUT_DIR,
tokenizer,
save_method="merged_16bit",
)
if __name__ == "__main__":
run()🦥 You can also ask via our Reddit page: https://www.reddit.com/r/unsloth/
behroozazarkhalili
Metadata
Metadata
Assignees
Labels
bugfixed - pending confirmationFixed, waiting for confirmation from posterFixed, waiting for confirmation from poster