Skip to content

Commit 863630d

Browse files
qgallouedecSunMarc
authored andcommitted
Fix PAD/EOS/BOS (#18)
* fix pad/eos/bos * base model maybe one day
1 parent c3c01f0 commit 863630d

File tree

2 files changed

+3
-12
lines changed

2 files changed

+3
-12
lines changed

src/transformers/models/openai_moe/configuration_openai_moe.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,6 @@ def __init__(
7575
initializer_range: float = 0.02,
7676
max_position_embeddings=131072,
7777
rms_norm_eps: float = 1e-5,
78-
pad_token_id: int = 0,
79-
bos_token_id: int = 1,
80-
eos_token_id: int = 2,
8178
rope_scaling={"rope_type": "yarn", "factor": 32.0, "beta_fast": 32.0, "beta_slow": 1.0, "truncate": False},
8279
attention_dropout: float = 0.0,
8380
num_experts_per_tok=4,
@@ -127,9 +124,6 @@ def __init__(
127124
self.output_router_logits = output_router_logits
128125
self.use_cache = use_cache
129126
super().__init__(
130-
pad_token_id=pad_token_id,
131-
bos_token_id=bos_token_id,
132-
eos_token_id=eos_token_id,
133127
tie_word_embeddings=tie_word_embeddings,
134128
**kwargs,
135129
)

src/transformers/models/openai_moe/convert_openai_weights_to_hf.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,7 @@ def write_model(
133133
mxfp4=False,
134134
):
135135
os.makedirs(model_path, exist_ok=True)
136-
bos_token_id = 128000
137-
eos_token_id = 199999 if not instruct else [199999, 200018]
138-
pad_token_id = 128004
136+
eos_token_id = 199999 if not instruct else 200002
139137

140138
original_config = json.loads((Path(input_base_path) / "config.json").read_text())
141139

@@ -149,7 +147,7 @@ def write_model(
149147
"original_max_position_embeddings": 4096
150148
}
151149

152-
config = OpenAIMoeConfig(num_local_experts=num_local_experts, rope_scaling=rope_scaling, **original_config)
150+
config = OpenAIMoeConfig(num_local_experts=num_local_experts, rope_scaling=rope_scaling, eos_token_id=eos_token_id, **original_config)
153151

154152
print(f"Fetching all parameters from the checkpoint at {input_base_path}...")
155153
final_ = {}
@@ -255,9 +253,7 @@ def write_model(
255253
do_sample=True,
256254
temperature=0.6,
257255
top_p=0.9,
258-
bos_token_id=bos_token_id,
259256
eos_token_id=eos_token_id,
260-
pad_token_id=pad_token_id,
261257
)
262258
generation_config.save_pretrained(model_path)
263259

@@ -396,6 +392,7 @@ def __init__(
396392
kwargs["chat_template"] = chat_template
397393
self.tokenizer = PreTrainedTokenizerFast(
398394
tokenizer_object=tokenizer,
395+
eos_token="<|return|>" if chat_template else "<|endoftext|>",
399396
model_input_names=["input_ids", "attention_mask"],
400397
model_max_length=model_max_length,
401398
**kwargs,

0 commit comments

Comments
 (0)