@@ -88,6 +88,7 @@ class MODEL_ARCH(IntEnum):
8888 PERSIMMON : int = auto ()
8989 REFACT : int = auto ()
9090 BERT : int = auto ()
91+ PLAMO : int = auto ()
9192
9293
9394class MODEL_TENSOR (IntEnum ):
@@ -125,6 +126,7 @@ class MODEL_TENSOR(IntEnum):
125126 MODEL_ARCH .PERSIMMON : "persimmon" ,
126127 MODEL_ARCH .REFACT : "refact" ,
127128 MODEL_ARCH .BERT : "bert" ,
129+ MODEL_ARCH .PLAMO : "plamo" ,
128130}
129131
130132TENSOR_NAMES : dict [MODEL_TENSOR , str ] = {
@@ -282,6 +284,21 @@ class MODEL_TENSOR(IntEnum):
282284 MODEL_TENSOR .FFN_DOWN ,
283285 MODEL_TENSOR .FFN_UP ,
284286 ],
287+ MODEL_ARCH .PLAMO : [
288+ MODEL_TENSOR .TOKEN_EMBD ,
289+ MODEL_TENSOR .OUTPUT_NORM ,
290+ MODEL_TENSOR .OUTPUT ,
291+ MODEL_TENSOR .ROPE_FREQS ,
292+ MODEL_TENSOR .ATTN_NORM ,
293+ MODEL_TENSOR .ATTN_Q ,
294+ MODEL_TENSOR .ATTN_K ,
295+ MODEL_TENSOR .ATTN_V ,
296+ MODEL_TENSOR .ATTN_OUT ,
297+ MODEL_TENSOR .ATTN_ROT_EMBD ,
298+ MODEL_TENSOR .FFN_GATE ,
299+ MODEL_TENSOR .FFN_DOWN ,
300+ MODEL_TENSOR .FFN_UP ,
301+ ],
285302 MODEL_ARCH .GPT2 : [
286303 # TODO
287304 ],
@@ -366,6 +383,7 @@ class TensorNameMap:
366383 "layers.{bid}.attention_norm" , # llama-pth
367384 "encoder.layer.{bid}.attention.output.LayerNorm" , # bert
368385 "language_model.encoder.layers.{bid}.input_layernorm" , # persimmon
386+ "model.layers.layers.{bid}.norm" , # plamo
369387 ),
370388
371389 # Attention norm 2
@@ -384,45 +402,50 @@ class TensorNameMap:
384402
385403 # Attention query
386404 MODEL_TENSOR .ATTN_Q : (
387- "model.layers.{bid}.self_attn.q_proj" , # llama-hf
388- "layers.{bid}.attention.wq" , # llama-pth
389- "encoder.layer.{bid}.attention.self.query" , # bert
390- "transformer.h.{bid}.attn.q_proj" , # gpt-j
405+ "model.layers.{bid}.self_attn.q_proj" , # llama-hf
406+ "layers.{bid}.attention.wq" , # llama-pth
407+ "encoder.layer.{bid}.attention.self.query" , # bert
408+ "transformer.h.{bid}.attn.q_proj" , # gpt-j
409+ "model.layers.layers.{bid}.self_attn.q_proj" , # plamo
391410 ),
392411
393412 # Attention key
394413 MODEL_TENSOR .ATTN_K : (
395- "model.layers.{bid}.self_attn.k_proj" , # llama-hf
396- "layers.{bid}.attention.wk" , # llama-pth
397- "encoder.layer.{bid}.attention.self.key" , # bert
398- "transformer.h.{bid}.attn.k_proj" , # gpt-j
414+ "model.layers.{bid}.self_attn.k_proj" , # llama-hf
415+ "layers.{bid}.attention.wk" , # llama-pth
416+ "encoder.layer.{bid}.attention.self.key" , # bert
417+ "transformer.h.{bid}.attn.k_proj" , # gpt-j
418+ "model.layers.layers.{bid}.self_attn.k_proj" , # plamo
399419 ),
400420
401421 # Attention value
402422 MODEL_TENSOR .ATTN_V : (
403- "model.layers.{bid}.self_attn.v_proj" , # llama-hf
404- "layers.{bid}.attention.wv" , # llama-pth
405- "encoder.layer.{bid}.attention.self.value" , # bert
406- "transformer.h.{bid}.attn.v_proj" , # gpt-j
423+ "model.layers.{bid}.self_attn.v_proj" , # llama-hf
424+ "layers.{bid}.attention.wv" , # llama-pth
425+ "encoder.layer.{bid}.attention.self.value" , # bert
426+ "transformer.h.{bid}.attn.v_proj" , # gpt-j
427+ "model.layers.layers.{bid}.self_attn.v_proj" , # plamo
407428 ),
408429
409430 # Attention output
410431 MODEL_TENSOR .ATTN_OUT : (
411- "gpt_neox.layers.{bid}.attention.dense" , # gptneox
412- "transformer.h.{bid}.attn.c_proj" , # gpt2 refact
413- "transformer.blocks.{bid}.attn.out_proj" , # mpt
414- "transformer.h.{bid}.self_attention.dense" , # falcon
415- "model.layers.{bid}.self_attn.o_proj" , # llama-hf
416- "layers.{bid}.attention.wo" , # llama-pth
417- "encoder.layer.{bid}.attention.output.dense" , # bert
418- "transformer.h.{bid}.attn.out_proj" , # gpt-j
419- "language_model.encoder.layers.{bid}.self_attention.dense" # persimmon
432+ "gpt_neox.layers.{bid}.attention.dense" , # gptneox
433+ "transformer.h.{bid}.attn.c_proj" , # gpt2 refact
434+ "transformer.blocks.{bid}.attn.out_proj" , # mpt
435+ "transformer.h.{bid}.self_attention.dense" , # falcon
436+ "model.layers.{bid}.self_attn.o_proj" , # llama-hf
437+ "layers.{bid}.attention.wo" , # llama-pth
438+ "encoder.layer.{bid}.attention.output.dense" , # bert
439+ "transformer.h.{bid}.attn.out_proj" , # gpt-j
440+ "language_model.encoder.layers.{bid}.self_attention.dense" , # persimmon
441+ "model.layers.layers.{bid}.self_attn.o_proj" , # plamo
420442 ),
421443
422444 # Rotary embeddings
423445 MODEL_TENSOR .ATTN_ROT_EMBD : (
424- "model.layers.{bid}.self_attn.rotary_emb.inv_freq" , # llama-hf
425- "layers.{bid}.attention.inner_attention.rope.freqs" , # llama-pth
446+ "model.layers.{bid}.self_attn.rotary_emb.inv_freq" , # llama-hf
447+ "layers.{bid}.attention.inner_attention.rope.freqs" , # llama-pth
448+ "model.layers.layers.{bid}.self_attn.rotary_emb.inv_freq" , # plamo
426449 ),
427450
428451 # Feed-forward norm
@@ -447,12 +470,14 @@ class TensorNameMap:
447470 "encoder.layer.{bid}.intermediate.dense" , # bert
448471 "transformer.h.{bid}.mlp.fc_in" , # gpt-j
449472 "language_model.encoder.layers.{bid}.mlp.dense_h_to_4h" , # persimmon
473+ "model.layers.layers.{bid}.mlp.up_proj" , # plamo
450474 ),
451475
452476 # Feed-forward gate
453477 MODEL_TENSOR .FFN_GATE : (
454- "model.layers.{bid}.mlp.gate_proj" , # llama-hf refact
455- "layers.{bid}.feed_forward.w1" , # llama-pth
478+ "model.layers.{bid}.mlp.gate_proj" , # llama-hf refact
479+ "layers.{bid}.feed_forward.w1" , # llama-pth
480+ "model.layers.layers.{bid}.mlp.gate_proj" , # plamo
456481 ),
457482
458483 # Feed-forward down
@@ -466,6 +491,7 @@ class TensorNameMap:
466491 "encoder.layer.{bid}.output.dense" , # bert
467492 "transformer.h.{bid}.mlp.fc_out" , # gpt-j
468493 "language_model.encoder.layers.{bid}.mlp.dense_4h_to_h" , # persimmon
494+ "model.layers.layers.{bid}.mlp.down_proj" , # plamo
469495 ),
470496
471497 MODEL_TENSOR .ATTN_Q_NORM : (
0 commit comments