---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[4], line 2
1 # 1) 모델 불러오기
----> 2 model, tokenizer = FastLanguageModel.from_pretrained(
3 model_name = "Youseff1987/qwen-3-4b-instruct-bnb-4bit-lora-2",
4 )
File /usr/local/lib/python3.11/dist-packages/unsloth/models/loader.py:365, in FastLanguageModel.from_pretrained(model_name, max_seq_length, dtype, load_in_4bit, load_in_8bit, full_finetuning, token, device_map, rope_scaling, fix_tokenizer, trust_remote_code, use_gradient_checkpointing, resize_model_vocab, revision, use_exact_model_name, fast_inference, gpu_memory_utilization, float8_kv_cache, random_state, max_lora_rank, disable_log_stats, qat_scheme, *args, **kwargs)
348 dispatch_model = FastQwen3Model if model_type == "qwen3" else FastQwen3MoeModel
349 # elif model_type == "falcon_h1":
350 # dispatch_model = FastFalconH1Model
351 # if not SUPPORTS_FALCON_H1:
(...) 363 # dispatch_model = FastGraniteModel
364 else:
--> 365 return FastModel.from_pretrained(
366 model_name = old_model_name,
367 max_seq_length = max_seq_length,
368 dtype = dtype,
369 load_in_4bit = load_in_4bit,
370 load_in_8bit = load_in_8bit,
371 full_finetuning = full_finetuning,
372 token = token,
373 device_map = device_map,
374 rope_scaling = rope_scaling, # [TODO] No effect
375 fix_tokenizer = fix_tokenizer, # [TODO] No effect
376 trust_remote_code = trust_remote_code,
377 use_gradient_checkpointing = use_gradient_checkpointing,
378 resize_model_vocab = resize_model_vocab, # [TODO] No effect
379 revision = revision,
380 return_logits = False, # Return logits
381 fullgraph = True, # No graph breaks
382 use_exact_model_name = use_exact_model_name,
383
384 # Pass vLLM/inference parameters
385 fast_inference = fast_inference,
386 gpu_memory_utilization = gpu_memory_utilization,
387 float8_kv_cache = float8_kv_cache,
388 random_state = random_state,
389 max_lora_rank = max_lora_rank,
390 disable_log_stats = disable_log_stats,
391
392 *args, **kwargs,
393 )
394 pass
396 if use_gradient_checkpointing == "unsloth":
File /usr/local/lib/python3.11/dist-packages/unsloth/models/loader.py:825, in FastModel.from_pretrained(model_name, max_seq_length, dtype, load_in_4bit, load_in_8bit, full_finetuning, token, device_map, rope_scaling, fix_tokenizer, trust_remote_code, use_gradient_checkpointing, resize_model_vocab, revision, return_logits, fullgraph, use_exact_model_name, auto_model, whisper_language, whisper_task, unsloth_force_compile, fast_inference, gpu_memory_utilization, float8_kv_cache, random_state, max_lora_rank, disable_log_stats, qat_scheme, *args, **kwargs)
823 with redirector:
824 patch_loss_functions(torch_compile = False)
--> 825 model_types, supports_sdpa = unsloth_compile_transformers(
826 dtype = dtype,
827 model_name = model_name,
828 model_types = model_types,
829 token = token,
830 sdpa_dynamic_mask = True,
831 sdpa_bool_masks = True,
832 sdpa_gqa_replace = True,
833 sdpa_dynamic_compile = True,
834 compile_attention = True,
835 disable_causal_masks = True,
836 compile_torch_modules = True,
837 compile_custom_modules = True,
838 compile_function_calls = True,
839 fuse_lm_head = True,
840 gradient_checkpointing = True,
841 manual_replacements = True,
842 fast_lora_forwards = True,
843 fast_residual_stream = False,
844 accurate_accumulation = True,
845 epilogue_fusion = True,
846 max_autotune = False,
847 shape_padding = True,
848 cudagraphs = False,
849 debug = False,
850 fullgraph = fullgraph,
851 import_from_cache = False,
852 disable = False,
853 return_logits = return_logits,
854 trust_remote_code = trust_remote_code,
855 unsloth_force_compile = unsloth_force_compile,
856 )
857 pass
858 # Fix SDPA
File /usr/local/lib/python3.11/dist-packages/unsloth/models/_utils.py:1470, in unsloth_compile_transformers(dtype, model_name, model_types, token, revision, trust_remote_code, sdpa_dynamic_mask, sdpa_bool_masks, sdpa_gqa_replace, sdpa_dynamic_compile, compile_attention, disable_causal_masks, compile_torch_modules, compile_custom_modules, compile_function_calls, fuse_lm_head, gradient_checkpointing, manual_replacements, fast_lora_forwards, fast_residual_stream, accurate_accumulation, epilogue_fusion, max_autotune, shape_padding, cudagraphs, debug, fullgraph, import_from_cache, disable, return_logits, unsloth_force_compile)
1468 supports_sdpa = [True]
1469 for model_type in model_types:
-> 1470 _unsloth_compile_transformers(
1471 model_type,
1472 sdpa_dynamic_mask = sdpa_dynamic_mask,
1473 sdpa_bool_masks = sdpa_bool_masks,
1474 sdpa_gqa_replace = sdpa_gqa_replace,
1475 sdpa_dynamic_compile = sdpa_dynamic_compile,
1476 compile_attention = compile_attention,
1477 disable_causal_masks = disable_causal_masks,
1478 compile_torch_modules = compile_torch_modules,
1479 compile_custom_modules = compile_custom_modules,
1480 compile_function_calls = compile_function_calls,
1481 fuse_lm_head = fuse_lm_head,
1482 gradient_checkpointing = gradient_checkpointing,
1483 manual_replacements = manual_replacements,
1484 fast_lora_forwards = fast_lora_forwards,
1485 fast_residual_stream = fast_residual_stream,
1486 accurate_accumulation = accurate_accumulation,
1487 epilogue_fusion = epilogue_fusion,
1488 max_autotune = max_autotune,
1489 shape_padding = shape_padding,
1490 cudagraphs = cudagraphs,
1491 debug = debug,
1492 fullgraph = fullgraph,
1493 import_from_cache = import_from_cache,
1494 disable = disable,
1495 return_logits = return_logits,
1496 supports_sdpa = supports_sdpa,
1497 )
1498 pass
1499 # Redo patches which override compiler
File /usr/local/lib/python3.11/dist-packages/unsloth_zoo/compiler.py:2215, in unsloth_compile_transformers(model_type, sdpa_dynamic_mask, sdpa_bool_masks, sdpa_gqa_replace, sdpa_dynamic_compile, compile_attention, disable_causal_masks, compile_torch_modules, compile_custom_modules, compile_function_calls, fuse_lm_head, gradient_checkpointing, manual_replacements, fast_lora_forwards, fast_residual_stream, accurate_accumulation, epilogue_fusion, max_autotune, shape_padding, cudagraphs, debug, fullgraph, import_from_cache, disable, return_logits, supports_sdpa)
2213 if disable_causal_masks:
2214 for module in other_classes:
-> 2215 source = eval(f"{model_location}.{module}")
2216 if not hasattr(source, "_update_causal_mask"): continue
2218 try: source = inspect.getsource(source.__init__)
File <string>:1
AttributeError: module 'transformers.models.bit.modeling_bit' has no attribute 'Linear'
It seems that the LoRA adapter itself is not broken. If you first load the base model and then load the LoRA adapter through PEFT, it can be loaded and used normally.
↓↓ It is OK, and working well for me. (But I want to be able to load base_model and adapter together. )
When saving the LoRA adapter locally and loading it back, it works fine and can be loaded at once just like other models.
However, if I try to load it in a separated way, it causes issues: merge does not work, and additional SFT cannot proceed.
pip install --upgrade unsloth unsloth_zooColaborKaggleor local / cloudnvidia-smiWhich Unsloth version, TRL version, transformers version, PyTorch version?
Unsloth version: 2025.9.7
TRL version: 0.22.2
Transformers version: 4.55.4
PyTorch version: 2.8.0+cu128
Which trainer?
SFTTrainer,GRPOTraineretcIt seems that the LoRA adapter itself is not broken. If you first load the base model and then load the LoRA adapter through PEFT, it can be loaded and used normally.
↓↓ It is OK, and working well for me. (But I want to be able to load base_model and adapter together. )
When saving the LoRA adapter locally and loading it back, it works fine and can be loaded at once just like other models.
However, if I try to load it in a separated way, it causes issues: merge does not work, and additional SFT cannot proceed.