Skip to content

Commit 04fcd78

Browse files
authored
Merge pull request #6659 from menloresearch/fix/6626
fix: Improve KV cache estimation robustness
2 parents d315522 + 34b254e commit 04fcd78

File tree

1 file changed

+31
-2
lines changed
  • src-tauri/plugins/tauri-plugin-llamacpp/src/gguf

1 file changed

+31
-2
lines changed

src-tauri/plugins/tauri-plugin-llamacpp/src/gguf/utils.rs

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ pub async fn estimate_kv_cache_internal(
6262
ctx_size: Option<u64>,
6363
) -> Result<KVCacheEstimate, KVCacheError> {
6464
log::info!("Received ctx_size parameter: {:?}", ctx_size);
65+
log::info!("Received model metadata:\n{:?}", &meta);
6566
let arch = meta
6667
.get("general.architecture")
6768
.ok_or(KVCacheError::ArchitectureNotFound)?;
@@ -94,15 +95,43 @@ pub async fn estimate_kv_cache_internal(
9495
let key_len_key = format!("{}.attention.key_length", arch);
9596
let val_len_key = format!("{}.attention.value_length", arch);
9697

97-
let key_len = meta
98+
let mut key_len = meta
9899
.get(&key_len_key)
99100
.and_then(|s| s.parse::<u64>().ok())
100101
.unwrap_or(0);
101-
let val_len = meta
102+
let mut val_len = meta
102103
.get(&val_len_key)
103104
.and_then(|s| s.parse::<u64>().ok())
104105
.unwrap_or(0);
105106

107+
// Fallback: calculate from embedding_length if key/val lengths not found
108+
if key_len == 0 || val_len == 0 {
109+
let emb_len_key = format!("{}.embedding_length", arch);
110+
let emb_len = meta
111+
.get(&emb_len_key)
112+
.and_then(|s| s.parse::<u64>().ok())
113+
.unwrap_or(0);
114+
115+
if emb_len > 0 && n_head > 0 {
116+
// For most transformers: head_dim = embedding_length / total_heads
117+
let total_heads = meta
118+
.get(&n_head_key)
119+
.and_then(|s| s.parse::<u64>().ok())
120+
.unwrap_or(n_head);
121+
122+
let head_dim = emb_len / total_heads;
123+
key_len = head_dim;
124+
val_len = head_dim;
125+
126+
log::info!(
127+
"Calculated key_len and val_len from embedding_length: {} / {} heads = {} per head",
128+
emb_len,
129+
total_heads,
130+
head_dim
131+
);
132+
}
133+
}
134+
106135
if key_len == 0 || val_len == 0 {
107136
return Err(KVCacheError::EmbeddingLengthInvalid);
108137
}

0 commit comments

Comments
 (0)