Skip to content

Commit a4a7b01

Browse files
danielhanchenDatta0shimmyshimmerjeromekummathew23
authored
Nightly (#3392)
* Update llama.py * Update _utils.py * Versioning * GPT OSS fix * GPT OSS fix * Update loader.py * Update vision.py * Update vision.py * Update loader.py * Update vision.py * Update vision.py * Update llama.py * Update llama.py * Update llama.py * Versioning * Update mapper.py * Update vision.py * Update vision.py * Update vision.py * Upcast norms * Update loader.py * Update vision.py * Upcast layernorms * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update save.py * Update rl.py * Update pyproject.toml * Update rl.py * Update rl_replacements.py * Update rl.py * Update rl.py * Update rl.py * Update _utils.py * Update __init__.py * Torch 2.8 * Update rl_replacements.py * Update loader.py * UNSLOTH_ENABLE_CCE * Fix * Update loader.py * Update loader.py * Update __init__.py * Update __init__.py * Update __init__.py * Update __init__.py * Import fixes * Update loader.py * Fix aimv2 issue * Update loader.py * Update import_fixes.py * Update import_fixes.py * Update loader.py * Update loader.py * Update loader.py * Upgrade * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update vision.py * Update vision.py * custom_datatype * recheck * Float16 * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Bug fix * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update loader.py * torch_dtype * Update rl.py * Fix CE Loss * Versioning * Update loader.py * Update loader.py * extract_model_type_from_config * Model types * Update loader.py * get_transformers_model_type * Update loader.py * Update loader.py * Update loader.py * Update rl.py * Update pyproject.toml * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Versioning * Update _utils.py * Update _utils.py * Update _utils.py * Update _utils.py * Update vision.py * Update vision.py * Fix DataParallel * Update _utils.py * Update rl.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update mapper.py * Versioning * Update loader.py * Update loader.py * Update rl.py * Versioning * Update _utils.py * Fix auto_mapping * Update loader.py * Update loader.py * Update vision.py * Update vision.py * Update loader.py * Message * Update vision.py * Update loader.py * Update vision.py * cache_implementation * Update vision.py * Update loader.py * Update vision.py * Update vision.py * Update vision.py * Update loader.py * Update vision.py * Save max_seq_length * Update _utils.py * Update rl.py * Update vision.py * Update llama.py * Mistral3 vllm (#3349) * [WIP] use vLLM for vision language models * Update README.md Editing icon sizes * Update README.md Updating icon sizes * Update README.md (#2885) * MoE kernels AGPLv3 * versioning * Many bug fixes (#2908) * add deepseek v3 * add deepseek r1 base * add deepseek r1 zero * add deepseek distill llama * add deepseek distill models * remove redundant code when constructing model names * add mistral small to registry * rename model registration methods * rename deepseek registration methods * refactor naming for mistral and phi * add global register models * refactor model registration tests for new registry apis * add model search method * remove deprecated registration api * add quant type test * add registry readme * make llama registration more specific * clear registry when executing individual model registration file * more registry readme updates * Update _auto_install.py * Llama4 * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Synthetic data * Update mapper.py * Xet and Synthetic * Update synthetic.py * Update loader.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update pyproject.toml * Delete .gitignore * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update _utils.py * Update pyproject.toml * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update chat_templates.py * Seasame force float16 / float32 * Fix Seasame * Update loader.py * Update vision.py * Update vision.py * Update vision.py * Update loader.py * is_multimodal * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update vision.py * Update vision.py * Update vision.py * UNSLOTH_DISABLE_STATIC_GENERATION * Update vision.py * Auto vision detection * Sesame * Whisper * Update loader.py * Update loader.py * Update loader.py * Update mapper.py * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update _utils.py * Update rl.py * versioning * Update rl.py * Update rl.py * Update rl.py * Update rl.py * Update rl.py * logging * Update pyproject.toml * Update rl.py * versioning * Update rl.py * Update rl.py * Update rl_replacements.py * Update rl_replacements.py * Update rl.py * Update rl_replacements.py * Update rl_replacements.py * logits / temperature * Update rl_replacements.py * Update pyproject.toml * Update rl_replacements.py * Update rl_replacements.py * Debugging only * Update llama.py * Update llama.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Generic efficient GRPO * Update rl_replacements.py * Update rl_replacements.py * Remove debugging * Update rl_replacements.py * Update rl_replacements.py * Update vision.py * Update llama.py * Update rl_replacements.py * versioning * Update _utils.py * Update vision.py * Update mapper.py * Update loader.py * Update mapper.py * Update vision.py * Update loader.py * Update vision.py * Update loader.py * Update _utils.py * Update vision.py * gradient checkpointing * Gemma 3N fixes * Update loader.py * Versioning * Gemma 3N fixes * Update vision.py * Update vision.py * Update loader.py * Update vision.py * Fix setup.py * setup.py * Prints * Update setup.py * Update setup.py * Update setup.py * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update vision.py * Update vision.py * Update pyproject.toml * Update vision.py * Update _utils.py * Update __init__.py * Update __init__.py --------- Co-authored-by: jeromeku <[email protected]> Co-authored-by: Michael Han <[email protected]> * silienty skip falcon h1 import is transformers_version < 4.53.0 (#2912) * Dynamically adjust get_per_token_logps function and patch as well (#2911) * add intel gpu with vllm support (#2903) * [bugs] fix for casual mask (#2868) * fix for casual mask * use un_casual in sdpa * add missing mask * fix for type * Explicitly check if xformers exists for attention (#2889) * Update __init__.py * Update llama.py * if mlp doesn't exist in layer module check for feed_forward name for falcon h1 (#2913) * Move inputs to right devices. (#2919) * Move tensors to right devices * fix multi gpu for non mistral models * multi GPU RoPE for gemma2 * Finish up multi GPU inference * Make multiGPU rope a list * Remove unnecessary transfer to CPU * Remove unnecessary move to CPU * Donot move inputs to device yet will be handled separately in another PR * Move inputs to appropriate decoder device * Make device count global variable * Cleanup RoPE device code * Fixup num_gpu to device count * Cleanup device counts * Use device index for RoPE get_cache * Donot typecast * Use tuple instead of list for tensors. Use device index directly * fixup move to device logic * WIP VLM vLLM * Make vLLM patch a function * Add save and load lora functions * Make fast_inference setup depend on the flag * Improve fast inference patching mechanism * Make vision setting depend on checks in fastbasemodel * Check LoRA and vLLM intercompatibility for vision models * Comment pointing to vLLM LoRA check * Improve lora validation on vLLM * Error out on no vLLM and increase max lora rank * Bug fixes (#3017) * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update pyproject.toml * Delete .gitignore * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update _utils.py * Update pyproject.toml * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update chat_templates.py * Seasame force float16 / float32 * Fix Seasame * Update loader.py * Update vision.py * Update vision.py * Update vision.py * Update loader.py * is_multimodal * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update vision.py * Update vision.py * Update vision.py * UNSLOTH_DISABLE_STATIC_GENERATION * Update vision.py * Auto vision detection * Sesame * Whisper * Update loader.py * Update loader.py * Update loader.py * Update mapper.py * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update _utils.py * Update rl.py * versioning * Update rl.py * Update rl.py * Update rl.py * Update rl.py * Update rl.py * logging * Update pyproject.toml * Update rl.py * versioning * Update rl.py * Update rl.py * Update rl_replacements.py * Update rl_replacements.py * Update rl.py * Update rl_replacements.py * Update rl_replacements.py * logits / temperature * Update rl_replacements.py * Update pyproject.toml * Update rl_replacements.py * Update rl_replacements.py * Debugging only * Update llama.py * Update llama.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Generic efficient GRPO * Update rl_replacements.py * Update rl_replacements.py * Remove debugging * Update rl_replacements.py * Update rl_replacements.py * Update vision.py * Update llama.py * Update rl_replacements.py * versioning * Update _utils.py * Update vision.py * Update mapper.py * Update loader.py * Update mapper.py * Update vision.py * Update loader.py * Update vision.py * Update loader.py * Update _utils.py * Update vision.py * gradient checkpointing * Gemma 3N fixes * Update loader.py * Versioning * Gemma 3N fixes * Update vision.py * Update vision.py * Update loader.py * Update vision.py * Fix setup.py * setup.py * Prints * Update setup.py * Update setup.py * Update setup.py * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update vision.py * Update vision.py * Update pyproject.toml * Update vision.py * Update _utils.py * Update __init__.py * Update __init__.py * Small fixes * Update vision.py * Update vision.py * versioning * Update __init__.py * Update llama.py * Update rl.py * Update rl.py * Update _utils.py * Update vision.py * Update vision.py * compiler stance * Update _utils.py * Update pyproject.toml * Update pyproject.toml * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Revert "Revert "Add Qwen2.5-VL-32B-Instruct mapping to fix quantized model me…" (#2990) This reverts commit 204fc46. * skip_guard_eval_unsafe fix * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update llama.py * Update llama.py * Fix `quantization_method` * versioning * fix for casual mask (#3011) * [intel] add for intel path for llama.py (#3012) * fix for intel path * remove unuse code * Update unsloth/models/llama.py --------- Co-authored-by: Daniel Han <[email protected]> * Update llama.py * Fix Gemma 2 (#3024) * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update pyproject.toml * Delete .gitignore * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update _utils.py * Update pyproject.toml * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update chat_templates.py * Seasame force float16 / float32 * Fix Seasame * Update loader.py * Update vision.py * Update vision.py * Update vision.py * Update loader.py * is_multimodal * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update vision.py * Update vision.py * Update vision.py * UNSLOTH_DISABLE_STATIC_GENERATION * Update vision.py * Auto vision detection * Sesame * Whisper * Update loader.py * Update loader.py * Update loader.py * Update mapper.py * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update _utils.py * Update rl.py * versioning * Update rl.py * Update rl.py * Update rl.py * Update rl.py * Update rl.py * logging * Update pyproject.toml * Update rl.py * versioning * Update rl.py * Update rl.py * Update rl_replacements.py * Update rl_replacements.py * Update rl.py * Update rl_replacements.py * Update rl_replacements.py * logits / temperature * Update rl_replacements.py * Update pyproject.toml * Update rl_replacements.py * Update rl_replacements.py * Debugging only * Update llama.py * Update llama.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Generic efficient GRPO * Update rl_replacements.py * Update rl_replacements.py * Remove debugging * Update rl_replacements.py * Update rl_replacements.py * Update vision.py * Update llama.py * Update rl_replacements.py * versioning * Update _utils.py * Update vision.py * Update mapper.py * Update loader.py * Update mapper.py * Update vision.py * Update loader.py * Update vision.py * Update loader.py * Update _utils.py * Update vision.py * gradient checkpointing * Gemma 3N fixes * Update loader.py * Versioning * Gemma 3N fixes * Update vision.py * Update vision.py * Update loader.py * Update vision.py * Fix setup.py * setup.py * Prints * Update setup.py * Update setup.py * Update setup.py * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update vision.py * Update vision.py * Update pyproject.toml * Update vision.py * Update _utils.py * Update __init__.py * Update __init__.py * Small fixes * Update vision.py * Update vision.py * versioning * Update __init__.py * Update llama.py * Update rl.py * Update rl.py * Update _utils.py * Update vision.py * Update vision.py * compiler stance * Update _utils.py * Update pyproject.toml * Update pyproject.toml * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Revert "Revert "Add Qwen2.5-VL-32B-Instruct mapping to fix quantized model me…" (#2990) This reverts commit 204fc46. * skip_guard_eval_unsafe fix * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update llama.py * Update llama.py * Fix `quantization_method` * versioning * Update _utils.py * Update _utils.py * Update _utils.py * falcon force float32 on sm<75 machines (#3026) * Fix torch compile issues (#3028) * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update pyproject.toml * Delete .gitignore * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update _utils.py * Update pyproject.toml * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update chat_templates.py * Seasame force float16 / float32 * Fix Seasame * Update loader.py * Update vision.py * Update vision.py * Update vision.py * Update loader.py * is_multimodal * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update vision.py * Update vision.py * Update vision.py * UNSLOTH_DISABLE_STATIC_GENERATION * Update vision.py * Auto vision detection * Sesame * Whisper * Update loader.py * Update loader.py * Update loader.py * Update mapper.py * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update _utils.py * Update rl.py * versioning * Update rl.py * Update rl.py * Update rl.py * Update rl.py * Update rl.py * logging * Update pyproject.toml * Update rl.py * versioning * Update rl.py * Update rl.py * Update rl_replacements.py * Update rl_replacements.py * Update rl.py * Update rl_replacements.py * Update rl_replacements.py * logits / temperature * Update rl_replacements.py * Update pyproject.toml * Update rl_replacements.py * Update rl_replacements.py * Debugging only * Update llama.py * Update llama.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Generic efficient GRPO * Update rl_replacements.py * Update rl_replacements.py * Remove debugging * Update rl_replacements.py * Update rl_replacements.py * Update vision.py * Update llama.py * Update rl_replacements.py * versioning * Update _utils.py * Update vision.py * Update mapper.py * Update loader.py * Update mapper.py * Update vision.py * Update loader.py * Update vision.py * Update loader.py * Update _utils.py * Update vision.py * gradient checkpointing * Gemma 3N fixes * Update loader.py * Versioning * Gemma 3N fixes * Update vision.py * Update vision.py * Update loader.py * Update vision.py * Fix setup.py * setup.py * Prints * Update setup.py * Update setup.py * Update setup.py * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update vision.py * Update vision.py * Update pyproject.toml * Update vision.py * Update _utils.py * Update __init__.py * Update __init__.py * Small fixes * Update vision.py * Update vision.py * versioning * Update __init__.py * Update llama.py * Update rl.py * Update rl.py * Update _utils.py * Update vision.py * Update vision.py * compiler stance * Update _utils.py * Update pyproject.toml * Update pyproject.toml * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Revert "Revert "Add Qwen2.5-VL-32B-Instruct mapping to fix quantized model me…" (#2990) This reverts commit 204fc46. * skip_guard_eval_unsafe fix * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update synthetic.py * Update llama.py * Update llama.py * Fix `quantization_method` * versioning * Update _utils.py * Update _utils.py * Update _utils.py * check stride * Cleanup * Update rope_embedding.py * Update gemma2.py * Fix `set_stance` * Update pyproject.toml * Update _utils.py * Fixup patch vllm * Disable mllama * Use variables to decide VLM support * Better attn_impl handling * Patch TF protobuf incompatability * Torch 2.8 (#3186) * Fix mamba * Update loader.py * Update vision.py * Update loader.py * Filter vLLM standby logs (#3131) * filter vLLM standby logs * safeguard standby logger patch * Update unsloth/models/_utils.py * Update unsloth/models/_utils.py * Update unsloth/models/_utils.py --------- Co-authored-by: Daniel Han <[email protected]> * Update loader.py * Add scaler * Update llama.py * Update _utils.py * Versioning * GPT OSS fix * GPT OSS fix * Update loader.py * Update vision.py * Update vision.py * Update loader.py * Update vision.py * Update vision.py * Update llama.py * Update llama.py * Update llama.py * Versioning * Update mapper.py * Update vision.py * Update vision.py * Update vision.py * Upcast norms * Update loader.py * Update vision.py * Upcast layernorms * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update save.py * Update rl.py * Update pyproject.toml * Update rl.py * Update rl_replacements.py * Update rl.py * Update rl.py * Update rl.py * Update _utils.py * Update __init__.py * Torch 2.8 * Update rl_replacements.py --------- Co-authored-by: Datta Nimmaturi <[email protected]> * Update _auto_install.py * Update pyproject.toml * Update rl.py * Protobuf issue * Update pyproject.toml * Fix extras transformers typo in pyproject.toml * Update _utils.py * Bug fixes (#3195) * Fix mamba * Update loader.py * Update vision.py * Update loader.py * Filter vLLM standby logs (#3131) * filter vLLM standby logs * safeguard standby logger patch * Update unsloth/models/_utils.py * Update unsloth/models/_utils.py * Update unsloth/models/_utils.py --------- Co-authored-by: Daniel Han <[email protected]> * Update loader.py * Add scaler * Update llama.py * Update _utils.py * Versioning * GPT OSS fix * GPT OSS fix * Update loader.py * Update vision.py * Update vision.py * Update loader.py * Update vision.py * Update vision.py * Update llama.py * Update llama.py * Update llama.py * Versioning * Update mapper.py * Update vision.py * Update vision.py * Update vision.py * Upcast norms * Update loader.py * Update vision.py * Upcast layernorms * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update save.py * Update rl.py * Update pyproject.toml * Update rl.py * Update rl_replacements.py * Update rl.py * Update rl.py * Update rl.py * Update _utils.py * Update __init__.py * Torch 2.8 * Update rl_replacements.py * Update loader.py * UNSLOTH_ENABLE_CCE * Fix * Update loader.py * Update loader.py * Update __init__.py * Update __init__.py * Update __init__.py * Update __init__.py * Import fixes * Update loader.py * Fix aimv2 issue * Update loader.py * Update import_fixes.py * Update import_fixes.py * Update loader.py * Update loader.py * Update loader.py * Upgrade * Update loader.py * Update loader.py * Update loader.py * Update loader.py --------- Co-authored-by: Datta Nimmaturi <[email protected]> * adallow float32 dtype in FastLanguageModel (#3204) * Update loader.py * Update vision.py * Suppress message and use unsloth sampling params * Use trl sampling params for now * Improve error message * fixup quantized fast inference model name * Add mistral 3 support --------- Co-authored-by: Michael Han <[email protected]> Co-authored-by: Daniel Han <[email protected]> Co-authored-by: jeromeku <[email protected]> Co-authored-by: DoubleMathew <[email protected]> Co-authored-by: Lei Zhenyuan <[email protected]> Co-authored-by: parth2510 <[email protected]> * Set padding to 0 * Fix patch * fixup patch (#3359) Co-authored-by: Datta Nimmaturi <[email protected]> * Update vision.py * Versioning * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update vision.py * MXFP4 dequant * Update loader.py * Update vision.py * load_in_16bit * Update vision.py * Update vision.py * Update vision.py * Update rl.py * Update vision.py * offload_embedding * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update vision.py * Update rl_replacements.py * Update loader.py * Fix padding issue * Update pyproject.toml * Update _utils.py * Update pyproject.toml * Update _utils.py --------- Co-authored-by: Datta Nimmaturi <[email protected]> Co-authored-by: Michael Han <[email protected]> Co-authored-by: jeromeku <[email protected]> Co-authored-by: DoubleMathew <[email protected]> Co-authored-by: Lei Zhenyuan <[email protected]> Co-authored-by: parth2510 <[email protected]>
1 parent 79ae180 commit a4a7b01

File tree

5 files changed

+93
-72
lines changed

5 files changed

+93
-72
lines changed

pyproject.toml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,18 +37,18 @@ triton = [
3737
]
3838

3939
huggingface = [
40-
"unsloth_zoo>=2025.9.11",
40+
"unsloth_zoo>=2025.9.13",
4141
"packaging",
4242
"tyro",
4343
"transformers>=4.51.3,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1,<=4.56.2",
44-
"datasets>=3.4.1,<4.0.0",
44+
"datasets>=3.4.1,!=4.0.*,!=4.1.0",
4545
"sentencepiece>=0.2.0",
4646
"tqdm",
4747
"psutil",
4848
"wheel>=0.42.0",
4949
"numpy",
5050
"accelerate>=0.34.1",
51-
"trl>=0.7.9,!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,!=0.15.0,!=0.19.0",
51+
"trl>=0.7.9,!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,!=0.15.0,!=0.19.0,<=0.23.0",
5252
"peft>=0.7.1,!=0.11.0",
5353
"protobuf",
5454
"huggingface_hub>=0.34.0",
@@ -453,11 +453,11 @@ colab-ampere-torch220 = [
453453
"flash-attn>=2.6.3",
454454
]
455455
colab-new = [
456-
"unsloth_zoo>=2025.9.11",
456+
"unsloth_zoo>=2025.9.13",
457457
"packaging",
458458
"tyro",
459-
"transformers>=4.51.3,!=4.47.0,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1,<=4.56.2",
460-
"datasets>=3.4.1,<4.0.0",
459+
"transformers>=4.51.3,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1,<=4.56.2",
460+
"datasets>=3.4.1,!=4.0.*,!=4.1.0",
461461
"sentencepiece>=0.2.0",
462462
"tqdm",
463463
"psutil",
@@ -471,7 +471,7 @@ colab-new = [
471471
]
472472
colab-no-deps = [
473473
"accelerate>=0.34.1",
474-
"trl>=0.7.9,!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,!=0.15.0,!=0.19.0",
474+
"trl>=0.7.9,!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,!=0.15.0,!=0.19.0,<=0.23.0",
475475
"peft>=0.7.1",
476476
"xformers",
477477
"bitsandbytes>=0.45.5",

unsloth/models/_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
__version__ = "2025.9.9"
15+
__version__ = "2025.9.10"
1616

1717
__all__ = [
1818
"SUPPORTS_BFLOAT16",

unsloth/models/loader.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -662,10 +662,15 @@ def from_pretrained(
662662
)
663663
model_types_all = ",".join(model_types) + ","
664664

665-
# Check versions
665+
# Save model types and loading method
666666
lowered_model_name = model_name.lower()
667-
if os.environ.get("UNSLOTH_MODEL_NAME", "") == "":
668-
os.environ["UNSLOTH_MODEL_NAME"] = lowered_model_name
667+
string = os.environ.get("UNSLOTH_MODEL_NAME", "") + model_types_all
668+
if load_in_4bit: string += "_load_in_4bit_"
669+
if load_in_8bit: string += "_load_in_8bit_"
670+
if load_in_16bit: string += "_load_in_16bit_"
671+
os.environ["UNSLOTH_MODEL_NAME"] = string
672+
673+
# Check versions
669674
LATEST = '\nPlease use transformers via `pip install --no-deps git+https://github.com/huggingface/transformers.git`'
670675
NIGHTLY = '\nPlease use nightly transformers via pip install --upgrade "transformers>=4.49.0"`'
671676
# Pixtral

unsloth/models/rl_replacements.py

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from collections import defaultdict
2828
from unsloth_zoo.rl_replacements import RL_REPLACEMENTS, left_pack_padding
2929
from unsloth import DEVICE_TYPE
30+
import textwrap
3031

3132
RL_EXTRA_ARGS = defaultdict(list)
3233
RL_FUNCTIONS = defaultdict(list)
@@ -295,12 +296,59 @@ def grpo_trainer__generate_and_score_completions(function_name, function):
295296
if self.use_vllm:"""
296297
function = function.replace(replace_part, new_replacement)
297298

298-
299299
return function
300300
pass
301301
RL_FUNCTIONS["grpo_trainer"].append(grpo_trainer__generate_and_score_completions)
302302

303303

304+
# Fix {"reasoning_effort" : "high"} not applied
305+
def grpo_trainer_fix_maybe_apply_chat_template(function_name, function):
306+
spaces = function.find("def ")
307+
if spaces % 4 != 0: return function
308+
spaces += 4
309+
replacement = """
310+
_chat_template_ = getattr(self.processing_class, "chat_template", None)
311+
if _chat_template_ is None: _chat_template_ = ""
312+
_supported_keys_ = set(("prompt", "chosen", "rejected", "completion", "messages", "label"))
313+
314+
prompts_text = []
315+
for _example_ in __INPUTS__REPLACEMENT__:
316+
_tokenizer_kwargs_ = {}
317+
if type(_example_) is not dict:
318+
_example_ = {"prompt": _example_}
319+
_left_keys_ = _example_.keys() - _supported_keys_
320+
for k in _left_keys_:
321+
if k in _chat_template_:
322+
v = _example_[k]
323+
if type(v) is str:
324+
_tokenizer_kwargs_[k] = v
325+
_x_ = maybe_apply_chat_template(_example_, self.processing_class, **_tokenizer_kwargs_)["prompt"]
326+
prompts_text.append(_x_)
327+
"""
328+
replacement = textwrap.dedent(replacement).strip()
329+
replacement = textwrap.indent(replacement, spaces*" ")
330+
replacement = f"\n{replacement}\n"
331+
what = 'prompts_text = [maybe_apply_chat_template(example, self.processing_class)["prompt"] for example in inputs]'
332+
function = function.replace(what, replacement.replace("__INPUTS__REPLACEMENT__", "inputs"))
333+
334+
"""prompts_text = [
335+
maybe_apply_chat_template({"prompt": prompt}, self.processing_class)["prompt"] for prompt in prompts
336+
]"""
337+
function = re.sub(
338+
r"prompts_text = \["\
339+
r"[\s]{0,}"\
340+
r"maybe_apply_chat_template\(\{[\"\']prompt[\"\'][\s]{0,}\:[\s]{0,}prompt[\s]{0,}\}[\s]{0,}\,[\s]{0,}self\.processing_class\)"\
341+
r"\[[\"\']prompt[\"\']\] for prompt in prompts"\
342+
r"[\s]{0,}"\
343+
r"\]",
344+
replacement.replace("__INPUTS__REPLACEMENT__", "prompts"),
345+
function,
346+
)
347+
return function
348+
pass
349+
RL_FUNCTIONS["grpo_trainer"].append(grpo_trainer_fix_maybe_apply_chat_template)
350+
351+
304352
# Remove _move_model_to_vllm
305353
def grpo_trainer__move_model_to_vllm(function_name, function):
306354
if function_name != "_move_model_to_vllm": return function

unsloth/models/vision.py

Lines changed: 28 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -524,8 +524,8 @@ def from_pretrained(
524524
quantizer = AUTO_QUANTIZATION_CONFIG_MAPPING[quantization_config["quant_method"]]
525525
quantizer_kwargs = {}
526526
# We cannot dequantize since gpt-oss-20b MXFP4 will now be gpt-oss-20b-BF16
527-
# if "dequantize" in inspect.signature(quantizer).parameters:
528-
# quantizer_kwargs["dequantize"] = True
527+
if load_in_16bit and "dequantize" in inspect.signature(quantizer).parameters:
528+
quantizer_kwargs["dequantize"] = True
529529
quantization_config = quantizer.from_dict(quantization_config, **quantizer_kwargs)
530530
kwargs["quantization_config"] = quantization_config
531531
pass
@@ -549,7 +549,7 @@ def from_pretrained(
549549
# attn_implementation = attn_implementation,
550550
**kwargs,
551551
)
552-
if hasattr(model, 'generate'):
552+
if hasattr(model, "generate"):
553553
model.fast_generate = model.generate
554554
model.fast_generate_batches = error_out_no_vllm
555555
if offload_embedding:
@@ -612,8 +612,17 @@ def from_pretrained(
612612
llm = load_vllm(**load_vllm_kwargs)
613613

614614
# Convert to HF format
615-
_, quant_state_dict = get_vllm_state_dict(llm, config = model_config, is_vision_model = True)
616-
model = convert_vllm_to_huggingface(quant_state_dict, model_config, dtype, bnb_config, is_vision_model = True)
615+
_, quant_state_dict = get_vllm_state_dict(
616+
llm,
617+
config = model_config,
618+
is_vision_model = True,
619+
)
620+
model = convert_vllm_to_huggingface(
621+
quant_state_dict,
622+
model_config,
623+
dtype, bnb_config,
624+
is_vision_model = True,
625+
)
617626
model.vllm_engine = llm
618627
model.fast_generate = model.vllm_engine.generate
619628
model.fast_generate_batches = functools.partial(generate_batches, model.vllm_engine)
@@ -753,52 +762,6 @@ def from_pretrained(
753762
return model, tokenizer
754763
pass
755764

756-
@staticmethod
757-
def pre_compile_for_inference(model_type, model, tokenizer):
758-
"""
759-
We need to invoke torch.compile to save VRAM usage and make it faster downstream.
760-
Sometimes torch.compile can use 3GB weirdly on large batches, then it goes down to <1GB.
761-
So we invoke torch.compile on short batches to reduce VRAM usage.
762-
"""
763-
if model_type is None or model is None or tokenizer is None: return
764-
if str(model_type).lower() not in PRE_COMPILE_INFERENCE: return
765-
if getattr(tokenizer, "chat_template", None) is None: return
766-
# Check if already compiled and exit
767-
for module in model.modules():
768-
if hasattr(module, "_pre_compiled_for_inference"): return
769-
pass
770-
print(f"🦥 Unsloth: Pre compiling {model_type.title()} model for faster inference - this might take 3 minutes or so!")
771-
print("========= Pre compiling model for faster inference. Please be patient thank you! =========")
772-
# Do single inference
773-
messages = [
774-
[
775-
{"role": "user", "content": f"What is 1+1 equal to?"},
776-
],
777-
]*1
778-
inputs = tokenizer.apply_chat_template(
779-
messages,
780-
add_generation_prompt = True,
781-
return_tensors = "pt",
782-
return_dict = True,
783-
).to(model.device)
784-
_ = model.generate(**inputs, max_new_tokens = 1)
785-
# Do batched inference
786-
messages = [
787-
[
788-
{"role": "user", "content": f"1+1"},
789-
],
790-
]*4
791-
inputs = tokenizer.apply_chat_template(
792-
messages,
793-
add_generation_prompt = True,
794-
return_tensors = "pt",
795-
return_dict = True,
796-
).to(model.device)
797-
_ = model.generate(**inputs, max_new_tokens = 2)
798-
# Set we already pre compiled
799-
model._pre_compiled_for_inference = True
800-
pass
801-
802765
@staticmethod
803766
def get_peft_model(
804767
model,
@@ -902,7 +865,11 @@ def get_peft_model(
902865
# Enable gradients on modules which are trainable
903866
requires_grad_for_gradient_checkpointing(model)
904867
trust_remote_code = getattr(model, "_unsloth_trust_remote_code", False)
905-
model = FastBaseModel.post_patch_model(model, use_gradient_checkpointing, trust_remote_code = trust_remote_code)
868+
model = FastBaseModel.post_patch_model(
869+
model,
870+
use_gradient_checkpointing = use_gradient_checkpointing,
871+
trust_remote_code = trust_remote_code,
872+
)
906873
model.max_seq_length = max_seq_length
907874
# Save to modules as well
908875
for module in model.modules():
@@ -998,14 +965,15 @@ def post_patch_model(
998965
m.for_inference = functools.partial(FastBaseModel.for_inference, m)
999966
m = m.model
1000967
# Set weight[padding_idx] = 0
1001-
with torch.no_grad():
1002-
for name, module in model.named_modules():
1003-
if type(module) is torch.nn.Embedding:
1004-
if getattr(module, "weight", None) is not None and getattr(module, "padding_idx", None) is not None:
1005-
if module.padding_idx < module.weight.shape[0]:
1006-
module.weight[module.padding_idx] = 0
1007-
# Patch for torch.compiled inference
1008-
# FastBaseModel.pre_compile_for_inference(model_type, model, tokenizer)
968+
# Only do this if tokenizer is defined since eos_token == pad_token sometimes!
969+
pad_token_id = getattr(tokenizer, "pad_token_id", None)
970+
if tokenizer is not None and getattr(tokenizer, "eos_token_id", None) != pad_token_id:
971+
with torch.no_grad():
972+
for name, module in model.named_modules():
973+
if type(module) is torch.nn.Embedding:
974+
if getattr(module, "weight", None) is not None and getattr(module, "padding_idx", None) is not None:
975+
if module.padding_idx == pad_token_id and module.padding_idx < module.weight.shape[0]:
976+
module.weight[module.padding_idx] = 0
1009977
return model
1010978
pass
1011979

0 commit comments

Comments
 (0)