@@ -133,9 +133,7 @@ def write_model(
133133 mxfp4 = False ,
134134):
135135 os .makedirs (model_path , exist_ok = True )
136- bos_token_id = 128000
137- eos_token_id = 199999 if not instruct else [199999 , 200018 ]
138- pad_token_id = 128004
136+ eos_token_id = 199999 if not instruct else 200002
139137
140138 original_config = json .loads ((Path (input_base_path ) / "config.json" ).read_text ())
141139
@@ -149,7 +147,7 @@ def write_model(
149147 "original_max_position_embeddings" : 4096
150148 }
151149
152- config = OpenAIMoeConfig (num_local_experts = num_local_experts , rope_scaling = rope_scaling , ** original_config )
150+ config = OpenAIMoeConfig (num_local_experts = num_local_experts , rope_scaling = rope_scaling , eos_token_id = eos_token_id , ** original_config )
153151
154152 print (f"Fetching all parameters from the checkpoint at { input_base_path } ..." )
155153 final_ = {}
@@ -255,9 +253,7 @@ def write_model(
255253 do_sample = True ,
256254 temperature = 0.6 ,
257255 top_p = 0.9 ,
258- bos_token_id = bos_token_id ,
259256 eos_token_id = eos_token_id ,
260- pad_token_id = pad_token_id ,
261257 )
262258 generation_config .save_pretrained (model_path )
263259
@@ -396,6 +392,7 @@ def __init__(
396392 kwargs ["chat_template" ] = chat_template
397393 self .tokenizer = PreTrainedTokenizerFast (
398394 tokenizer_object = tokenizer ,
395+ eos_token = "<|return|>" if chat_template else "<|endoftext|>" ,
399396 model_input_names = ["input_ids" , "attention_mask" ],
400397 model_max_length = model_max_length ,
401398 ** kwargs ,
0 commit comments