@@ -37,6 +37,8 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
3737 { LLM_ARCH_QWEN3NEXT, " qwen3next" },
3838 { LLM_ARCH_QWEN3VL, " qwen3vl" },
3939 { LLM_ARCH_QWEN3VLMOE, " qwen3vlmoe" },
40+ { LLM_ARCH_QWEN35, " qwen35" },
41+ { LLM_ARCH_QWEN35MOE, " qwen35moe" },
4042 { LLM_ARCH_PHI2, " phi2" },
4143 { LLM_ARCH_PHI3, " phi3" },
4244 { LLM_ARCH_PHIMOE, " phimoe" },
@@ -72,6 +74,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
7274 { LLM_ARCH_CHATGLM, " chatglm" },
7375 { LLM_ARCH_GLM4, " glm4" },
7476 { LLM_ARCH_GLM4_MOE, " glm4moe" },
77+ { LLM_ARCH_GLM_DSA, " glm-dsa" },
7578 { LLM_ARCH_BITNET, " bitnet" },
7679 { LLM_ARCH_T5, " t5" },
7780 { LLM_ARCH_T5ENCODER, " t5encoder" },
@@ -195,6 +198,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
195198 { LLM_KV_EMBEDDING_SCALE, " %s.embedding_scale" },
196199 { LLM_KV_TOKEN_SHIFT_COUNT, " %s.token_shift_count" },
197200 { LLM_KV_INTERLEAVE_MOE_LAYER_STEP, " %s.interleave_moe_layer_step" },
201+ { LLM_KV_FULL_ATTENTION_INTERVAL, " %s.full_attention_interval" },
198202
199203 { LLM_KV_ATTENTION_HEAD_COUNT, " %s.attention.head_count" },
200204 { LLM_KV_ATTENTION_HEAD_COUNT_KV, " %s.attention.head_count_kv" },
@@ -222,6 +226,9 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
222226 { LLM_KV_ATTENTION_TEMPERATURE_SCALE, " %s.attention.temperature_scale" },
223227 { LLM_KV_ATTENTION_KEY_LENGTH_MLA, " %s.attention.key_length_mla" },
224228 { LLM_KV_ATTENTION_VALUE_LENGTH_MLA, " %s.attention.value_length_mla" },
229+ { LLM_KV_ATTENTION_INDEXER_HEAD_COUNT, " %s.attention.indexer.head_count" },
230+ { LLM_KV_ATTENTION_INDEXER_KEY_LENGTH, " %s.attention.indexer.key_length" },
231+ { LLM_KV_ATTENTION_INDEXER_TOP_K, " %s.attention.indexer.top_k" },
225232
226233 { LLM_KV_ROPE_DIMENSION_COUNT, " %s.rope.dimension_count" },
227234 { LLM_KV_ROPE_DIMENSION_SECTIONS, " %s.rope.dimension_sections" },
@@ -366,6 +373,7 @@ static const std::map<llm_tensor, const char *> LLM_TENSOR_NAMES = {
366373 { LLM_TENSOR_SSM_CONV1D, " blk.%d.ssm_conv1d" },
367374 { LLM_TENSOR_SSM_DT, " blk.%d.ssm_dt" },
368375 { LLM_TENSOR_SSM_BETA_ALPHA, " blk.%d.ssm_ba" },
376+ { LLM_TENSOR_SSM_ALPHA, " blk.%d.ssm_alpha" },
369377 { LLM_TENSOR_SSM_IN, " blk.%d.ssm_in" },
370378 { LLM_TENSOR_SSM_NORM, " blk.%d.ssm_norm" },
371379 { LLM_TENSOR_SSM_OUT, " blk.%d.ssm_out" },
@@ -512,6 +520,10 @@ static const std::map<llm_tensor, const char *> LLM_TENSOR_NAMES = {
512520 { LLM_TENSOR_VISEXP_FFN_GATE, " blk.%d.vis_gate" },
513521 { LLM_TENSOR_VISEXP_FFN_DOWN, " blk.%d.vis_down" },
514522 { LLM_TENSOR_VISEXP_FFN_UP, " blk.%d.vis_up" },
523+ { LLM_TENSOR_INDEXER_K_NORM, " blk.%d.indexer.k_norm" },
524+ { LLM_TENSOR_INDEXER_PROJ, " blk.%d.indexer.proj" },
525+ { LLM_TENSOR_INDEXER_ATTN_K, " blk.%d.indexer.attn_k" },
526+ { LLM_TENSOR_INDEXER_ATTN_Q_B, " blk.%d.indexer.attn_q_b" },
515527};
516528
517529static std::set<llm_tensor> llm_get_tensor_names (llm_arch arch) {
@@ -968,7 +980,6 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
968980 LLM_TENSOR_ATTN_OUT,
969981 LLM_TENSOR_ATTN_QKV,
970982 LLM_TENSOR_ATTN_GATE,
971- LLM_TENSOR_FFN_NORM,
972983 LLM_TENSOR_FFN_GATE_INP,
973984 LLM_TENSOR_FFN_GATE_EXPS,
974985 LLM_TENSOR_FFN_DOWN_EXPS,
@@ -985,6 +996,63 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
985996 LLM_TENSOR_SSM_NORM,
986997 LLM_TENSOR_SSM_OUT,
987998 };
999+ case LLM_ARCH_QWEN35:
1000+ return {
1001+ LLM_TENSOR_TOKEN_EMBD,
1002+ LLM_TENSOR_OUTPUT_NORM,
1003+ LLM_TENSOR_OUTPUT,
1004+ LLM_TENSOR_ATTN_NORM,
1005+ LLM_TENSOR_ATTN_POST_NORM,
1006+ LLM_TENSOR_ATTN_Q,
1007+ LLM_TENSOR_ATTN_Q_NORM,
1008+ LLM_TENSOR_ATTN_K,
1009+ LLM_TENSOR_ATTN_K_NORM,
1010+ LLM_TENSOR_ATTN_V,
1011+ LLM_TENSOR_ATTN_OUT,
1012+ LLM_TENSOR_ATTN_QKV,
1013+ LLM_TENSOR_ATTN_GATE,
1014+ LLM_TENSOR_FFN_GATE,
1015+ LLM_TENSOR_FFN_DOWN,
1016+ LLM_TENSOR_FFN_UP,
1017+ LLM_TENSOR_SSM_A_NOSCAN,
1018+ LLM_TENSOR_SSM_CONV1D,
1019+ LLM_TENSOR_SSM_DT,
1020+ LLM_TENSOR_SSM_BETA,
1021+ LLM_TENSOR_SSM_ALPHA,
1022+ LLM_TENSOR_SSM_NORM,
1023+ LLM_TENSOR_SSM_OUT,
1024+ };
1025+ case LLM_ARCH_QWEN35MOE:
1026+ return {
1027+ LLM_TENSOR_TOKEN_EMBD,
1028+ LLM_TENSOR_OUTPUT_NORM,
1029+ LLM_TENSOR_OUTPUT,
1030+ LLM_TENSOR_ATTN_NORM,
1031+ LLM_TENSOR_ATTN_POST_NORM,
1032+ LLM_TENSOR_ATTN_Q,
1033+ LLM_TENSOR_ATTN_Q_NORM,
1034+ LLM_TENSOR_ATTN_K,
1035+ LLM_TENSOR_ATTN_K_NORM,
1036+ LLM_TENSOR_ATTN_V,
1037+ LLM_TENSOR_ATTN_OUT,
1038+ LLM_TENSOR_ATTN_QKV,
1039+ LLM_TENSOR_ATTN_GATE,
1040+ LLM_TENSOR_FFN_GATE_INP,
1041+ LLM_TENSOR_FFN_GATE_EXPS,
1042+ LLM_TENSOR_FFN_DOWN_EXPS,
1043+ LLM_TENSOR_FFN_UP_EXPS,
1044+ LLM_TENSOR_FFN_GATE_INP_SHEXP,
1045+ LLM_TENSOR_FFN_GATE_SHEXP,
1046+ LLM_TENSOR_FFN_DOWN_SHEXP,
1047+ LLM_TENSOR_FFN_UP_SHEXP,
1048+ LLM_TENSOR_SSM_A_NOSCAN,
1049+ LLM_TENSOR_SSM_CONV1D,
1050+ LLM_TENSOR_SSM_DT,
1051+ LLM_TENSOR_SSM_BETA,
1052+ LLM_TENSOR_SSM_ALPHA,
1053+ LLM_TENSOR_SSM_NORM,
1054+ LLM_TENSOR_SSM_OUT,
1055+ };
9881056 case LLM_ARCH_QWEN3VL:
9891057 case LLM_ARCH_CHAMELEON:
9901058 case LLM_ARCH_HUNYUAN_DENSE:
@@ -1597,6 +1665,46 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
15971665 LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,
15981666 LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,
15991667 };
1668+ case LLM_ARCH_GLM_DSA:
1669+ return {
1670+ LLM_TENSOR_TOKEN_EMBD,
1671+ LLM_TENSOR_OUTPUT_NORM,
1672+ LLM_TENSOR_OUTPUT,
1673+ LLM_TENSOR_ATTN_NORM,
1674+ LLM_TENSOR_ATTN_Q_A_NORM,
1675+ LLM_TENSOR_ATTN_KV_A_NORM,
1676+ LLM_TENSOR_ATTN_Q,
1677+ LLM_TENSOR_ATTN_Q_A,
1678+ LLM_TENSOR_ATTN_Q_B,
1679+ LLM_TENSOR_ATTN_KV_A_MQA,
1680+ LLM_TENSOR_ATTN_KV_B,
1681+ LLM_TENSOR_ATTN_K_B,
1682+ LLM_TENSOR_ATTN_V_B,
1683+ LLM_TENSOR_ATTN_OUT,
1684+ LLM_TENSOR_FFN_NORM,
1685+ LLM_TENSOR_FFN_GATE,
1686+ LLM_TENSOR_FFN_UP,
1687+ LLM_TENSOR_FFN_DOWN,
1688+ LLM_TENSOR_FFN_GATE_INP,
1689+ LLM_TENSOR_FFN_GATE_EXPS,
1690+ LLM_TENSOR_FFN_DOWN_EXPS,
1691+ LLM_TENSOR_FFN_UP_EXPS,
1692+ LLM_TENSOR_FFN_GATE_INP_SHEXP,
1693+ LLM_TENSOR_FFN_GATE_SHEXP,
1694+ LLM_TENSOR_FFN_DOWN_SHEXP,
1695+ LLM_TENSOR_FFN_UP_SHEXP,
1696+ LLM_TENSOR_FFN_EXP_PROBS_B,
1697+ LLM_TENSOR_INDEXER_K_NORM,
1698+ LLM_TENSOR_INDEXER_PROJ,
1699+ LLM_TENSOR_INDEXER_ATTN_K,
1700+ LLM_TENSOR_INDEXER_ATTN_Q_B,
1701+ LLM_TENSOR_NEXTN_EH_PROJ,
1702+ LLM_TENSOR_NEXTN_EMBED_TOKENS,
1703+ LLM_TENSOR_NEXTN_ENORM,
1704+ LLM_TENSOR_NEXTN_HNORM,
1705+ LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,
1706+ LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,
1707+ };
16001708 case LLM_ARCH_BITNET:
16011709 return {
16021710 LLM_TENSOR_TOKEN_EMBD,
@@ -2456,6 +2564,7 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
24562564 {LLM_TENSOR_SSM_X, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
24572565 {LLM_TENSOR_SSM_DT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
24582566 {LLM_TENSOR_SSM_OUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2567+ {LLM_TENSOR_SSM_ALPHA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
24592568 {LLM_TENSOR_SSM_BETA_ALPHA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
24602569 {LLM_TENSOR_TIME_MIX_W1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
24612570 {LLM_TENSOR_TIME_MIX_W2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
@@ -2582,6 +2691,10 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
25822691 {LLM_TENSOR_VISEXP_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
25832692 {LLM_TENSOR_VISEXP_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
25842693 {LLM_TENSOR_VISEXP_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2694+ {LLM_TENSOR_INDEXER_K_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2695+ {LLM_TENSOR_INDEXER_PROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2696+ {LLM_TENSOR_INDEXER_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2697+ {LLM_TENSOR_INDEXER_ATTN_Q_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
25852698 // NextN/MTP tensors are currently ignored (reserved for future MTP support)
25862699 // These tensors only exist in the last layer(s) and are treated as output tensors
25872700 {LLM_TENSOR_NEXTN_EH_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
@@ -2675,6 +2788,8 @@ bool llm_arch_is_hybrid(const llm_arch & arch) {
26752788 case LLM_ARCH_NEMOTRON_H_MOE:
26762789 case LLM_ARCH_QWEN3NEXT:
26772790 case LLM_ARCH_KIMI_LINEAR:
2791+ case LLM_ARCH_QWEN35:
2792+ case LLM_ARCH_QWEN35MOE:
26782793 return true ;
26792794 default :
26802795 return false ;
0 commit comments