Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
d15be12
examples : include examples in msvc disable warn (ggml/1270)
danbev Jun 12, 2025
44dbaa5
ggml : remove unused ggml_context_container (ggml/1272)
danbev Jun 13, 2025
0ea0bb8
ggml : disable warnings for tests when using MSVC (ggml/1273)
danbev Jun 13, 2025
9b850cf
rpc : nicer error messages for RPC server crash (llama/14076)
isaac-mcfadyen Jun 10, 2025
19f70d6
Vulkan: Don't default to CPU device (like llvmpipe), even if no other…
0cc4m Jun 10, 2025
ea295ba
opencl: add `mul_mv_id_q4_0_f32_8x_flat` (llama/14003)
lhez Jun 10, 2025
9de0db5
vulkan: Track descriptor pools/sets per-context (llama/14109)
jeffbolznv Jun 11, 2025
ada4c55
vulkan: Better thread-safety for command pools/buffers (llama/14116)
jeffbolznv Jun 11, 2025
fc447c1
Implement GGML_CPU_ALL_VARIANTS for ARM (llama/14080)
ckastner Jun 11, 2025
7bc2b17
cmake : handle whitepsaces in path during metal build (llama/14126)
ggerganov Jun 12, 2025
1e6693c
sycl: Remove not needed copy f16->f32 for dnnl mul mat (llama/14125)
ShanoToni Jun 12, 2025
c583825
SYCL: Bump oneMath commit (llama/14152)
Jun 13, 2025
28560f9
sycl: Adding additional cpy dbg print output (llama/14034)
ShanoToni Jun 13, 2025
39f31e3
HIP: Replace usage of depricated preprocessor macro __AMDGCN_WAVEFRON…
IMbackK Jun 15, 2025
6c721d6
CUDA/HIP: fix ssm_scan on devices where warp size is not 32 (llama/14…
IMbackK Jun 15, 2025
4640699
ggml-cpu : rework weak alias on apple targets (llama/14146)
xctan Jun 16, 2025
2c5f56b
vulkan: mutex around vkQueueSubmit (llama/14127)
jeffbolznv Jun 16, 2025
c404919
ggml: Add Android support for GGML_CPU_ALL_VARIANTS (llama/14206)
chaxu01 Jun 16, 2025
acb45d4
HIP: disable rocwmma on gfx12 by default until rocm 7.0 (llama/14202)
IMbackK Jun 16, 2025
91f8856
cmake: clean up external project logic for vulkan-shaders-gen (llama/…
mtmcp Jun 16, 2025
311fa20
llama : add thread safety test (llama/14035)
slaren Jun 16, 2025
2400bdb
musa: fix build warning (unused variable) (llama/14231)
yeahdongcn Jun 17, 2025
9686aec
ggml-cpu : remove the weak alias trick (llama/14221)
xctan Jun 17, 2025
ad8b05b
cmake: remove shader-gen step-targets from ggml-vulkan (llama/14226)
mtmcp Jun 17, 2025
a8d075d
sync : ggml
ggerganov Jun 18, 2025
9cc9701
talk-llama : sync llama.cpp
ggerganov Jun 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions examples/talk-llama/llama-arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
{ LLM_ARCH_BERT, "bert" },
{ LLM_ARCH_NOMIC_BERT, "nomic-bert" },
{ LLM_ARCH_NOMIC_BERT_MOE, "nomic-bert-moe" },
{ LLM_ARCH_NEO_BERT, "neo-bert" },
{ LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" },
{ LLM_ARCH_BLOOM, "bloom" },
{ LLM_ARCH_STABLELM, "stablelm" },
Expand Down Expand Up @@ -72,6 +73,8 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
{ LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
{ LLM_ARCH_PLM, "plm" },
{ LLM_ARCH_BAILINGMOE, "bailingmoe" },
{ LLM_ARCH_DOTS1, "dots1" },
{ LLM_ARCH_ARCEE, "arcee" },
{ LLM_ARCH_UNKNOWN, "(unknown)" },
};

Expand Down Expand Up @@ -243,6 +246,24 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
},
},
{
LLM_ARCH_ARCEE,
{
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
{ LLM_TENSOR_OUTPUT, "output" },
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
{ LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
},
},
{
LLM_ARCH_LLAMA4,
{
Expand Down Expand Up @@ -494,6 +515,21 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
},
},
{
LLM_ARCH_NEO_BERT,
{
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
{ LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
{ LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
{ LLM_TENSOR_CLS, "cls" },
{ LLM_TENSOR_CLS_OUT, "cls.output" },
},
},
{
LLM_ARCH_JINA_BERT_V2,
{
Expand Down Expand Up @@ -1555,6 +1591,34 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
},
},
{
LLM_ARCH_DOTS1,
{
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
{ LLM_TENSOR_OUTPUT, "output" },
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
{ LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
{ LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
}
},
{
LLM_ARCH_UNKNOWN,
{
Expand Down
3 changes: 3 additions & 0 deletions examples/talk-llama/llama-arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ enum llm_arch {
LLM_ARCH_BERT,
LLM_ARCH_NOMIC_BERT,
LLM_ARCH_NOMIC_BERT_MOE,
LLM_ARCH_NEO_BERT,
LLM_ARCH_JINA_BERT_V2,
LLM_ARCH_BLOOM,
LLM_ARCH_STABLELM,
Expand Down Expand Up @@ -76,6 +77,8 @@ enum llm_arch {
LLM_ARCH_WAVTOKENIZER_DEC,
LLM_ARCH_PLM,
LLM_ARCH_BAILINGMOE,
LLM_ARCH_DOTS1,
LLM_ARCH_ARCEE,
LLM_ARCH_UNKNOWN,
};

Expand Down
Loading
Loading