@@ -7950,119 +7950,6 @@ def set_vocab(self):
79507950 self .gguf_writer .add_chat_template (chat_template )
79517951
79527952
7953- @ModelBase .register ("GptOssForCausalLM" )
7954- class GptOssModel (TextModel ):
7955- model_arch = gguf .MODEL_ARCH .GPT_OSS
7956-
7957- def transform_nibble_layout (self , tensor ):
7958- assert tensor .dtype == torch .uint8
7959- assert tensor .shape [- 1 ] == 16
7960- # swap nibbles
7961- t_lo = tensor & 0x0F
7962- t_hi = tensor & 0xF0
7963- t_swapped = (t_lo << 4 ) | (t_hi >> 4 )
7964- tensor = t_swapped
7965- # transform aaaa...bbbb... to abababab...
7966- blk_a , blk_b = tensor .chunk (2 , dim = - 1 )
7967- # get a_
7968- blk_a0 = (blk_a & 0xF0 ).view (- 1 , 1 )
7969- blk_a1 = (blk_a << 4 ).view (- 1 , 1 )
7970- blk_a = torch .stack ((blk_a0 , blk_a1 ), dim = 2 ).view (tensor .shape )
7971- # get _b
7972- blk_b0 = (blk_b >> 4 ).view (- 1 , 1 )
7973- blk_b1 = (blk_b & 0x0F ).view (- 1 , 1 )
7974- blk_b = torch .stack ((blk_b0 , blk_b1 ), dim = 2 ).view (tensor .shape )
7975- # swap once more
7976- out = blk_a | blk_b
7977- out_h = out & 0xF0
7978- out_l = out & 0x0F
7979- out = (out_h >> 4 ) | (out_l << 4 )
7980- return out
7981-
7982- def repack_mxfp4 (self , new_name : str , blocks : Tensor , scales : Tensor ):
7983- assert blocks .dtype == torch .uint8
7984- assert scales .dtype == torch .uint8
7985- scales = scales .unsqueeze (- 1 )
7986- assert len (blocks .shape ) == 4
7987- assert len (scales .shape ) == 4
7988- blocks = self .transform_nibble_layout (blocks )
7989- new_data = torch .concat ((scales , blocks ), dim = - 1 )
7990- new_shape = [new_data .shape [0 ], new_data .shape [1 ], new_data .shape [2 ] * 32 ]
7991- logger .info (f"Repacked { new_name } with shape { new_shape } and quantization MXFP4" )
7992- # flatten last dim
7993- new_data = new_data .view (new_data .shape [0 ], new_data .shape [1 ], new_data .shape [2 ] * new_data .shape [3 ])
7994- new_data = new_data .numpy ()
7995- self .gguf_writer .add_tensor (new_name , new_data , raw_dtype = gguf .GGMLQuantizationType .MXFP4 )
7996-
7997- def generate_extra_tensors (self ) -> Iterable [tuple [str , Tensor ]]:
7998- blocks0 : Tensor = torch .zeros (1 )
7999- blocks1 : Tensor = torch .zeros (1 )
8000- found_mxfp4_tensors = False
8001- # we assume that tensors are loaded in the correct order
8002- for name , data_torch in self .get_tensors ():
8003- if "mlp.experts.down_proj_blocks" in name :
8004- blocks0 = data_torch
8005- elif "mlp.experts.down_proj_scales" in name :
8006- new_name = self .map_tensor_name (name .replace ("_scales" , ".weight" ))
8007- self .repack_mxfp4 (new_name , blocks0 , data_torch )
8008- found_mxfp4_tensors = True
8009- elif "mlp.experts.gate_up_proj_blocks" in name :
8010- blocks0 , blocks1 = data_torch [:, ::2 , :, :], data_torch [:, 1 ::2 , :, :]
8011- elif "mlp.experts.gate_up_proj_scales" in name :
8012- scales0 , scales1 = data_torch [:, ::2 , :], data_torch [:, 1 ::2 , :]
8013- new_name_gate = self .map_tensor_name (name .replace ("gate_up_proj_scales" , "gate_proj.weight" ))
8014- new_name_up = self .map_tensor_name (name .replace ("gate_up_proj_scales" , "up_proj.weight" ))
8015- self .repack_mxfp4 (new_name_gate , blocks0 , scales0 )
8016- self .repack_mxfp4 (new_name_up , blocks1 , scales1 )
8017- found_mxfp4_tensors = True
8018- if not found_mxfp4_tensors :
8019- raise ValueError ("No MXFP4 tensors found in the model. Please make sure you are using MXFP4 model." )
8020- return []
8021-
8022- def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
8023- del bid # unused
8024-
8025- if "sinks" in name :
8026- name += ".weight"
8027-
8028- # correct naming for down_proj
8029- if "down_proj" in name :
8030- if name .endswith ("_bias" ):
8031- name = name .replace ("down_proj_bias" , "down_proj.bias" )
8032- else :
8033- return []
8034-
8035- # split the gate_up into gate and up
8036- if "gate_up_proj" in name :
8037- if name .endswith ("_bias" ):
8038- name_up = name .replace ("gate_up_proj_bias" , "up_proj.bias" )
8039- name_gate = name .replace ("gate_up_proj_bias" , "gate_proj.bias" )
8040- gate_proj_bias , up_proj_bias = data_torch [..., ::2 ], data_torch [..., 1 ::2 ]
8041- return [
8042- (self .map_tensor_name (name_gate ), gate_proj_bias ),
8043- (self .map_tensor_name (name_up ), up_proj_bias )
8044- ]
8045- else :
8046- return []
8047-
8048- return [(self .map_tensor_name (name ), data_torch )]
8049-
8050- def set_vocab (self ):
8051- self ._set_vocab_gpt2 ()
8052-
8053- def set_gguf_parameters (self ):
8054- super ().set_gguf_parameters ()
8055- self .gguf_writer .add_sliding_window (self .hparams ["sliding_window" ])
8056- self .gguf_writer .add_expert_feed_forward_length (self .hparams ["intermediate_size" ])
8057-
8058- rope_scaling = self .hparams .get ("rope_scaling" ) or {}
8059- rope_type = rope_scaling .get ("rope_type" , rope_scaling .get ("type" ))
8060- assert rope_type == "yarn" , f"GPT-OSS only supports yarn rope scaling, got { rope_type } "
8061- self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
8062- self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
8063- self .gguf_writer .add_rope_scaling_orig_ctx_len (rope_scaling .get ("original_max_position_embeddings" , 4096 ))
8064-
8065-
80667953@ModelBase .register ("Lfm2ForCausalLM" )
80677954@ModelBase .register ("LFM2ForCausalLM" )
80687955class LFM2Model (TextModel ):
@@ -8202,7 +8089,6 @@ class LazyTorchTensor(gguf.LazyBase):
82028089 _dtype_map : dict [torch .dtype , type ] = {
82038090 torch .float16 : np .float16 ,
82048091 torch .float32 : np .float32 ,
8205- torch .uint8 : np .uint8 ,
82068092 }
82078093
82088094 # used for safetensors slices
0 commit comments