@@ -62,6 +62,7 @@ pub async fn estimate_kv_cache_internal(
6262 ctx_size : Option < u64 > ,
6363) -> Result < KVCacheEstimate , KVCacheError > {
6464 log:: info!( "Received ctx_size parameter: {:?}" , ctx_size) ;
65+ log:: info!( "Received model metadata:\n {:?}" , & meta) ;
6566 let arch = meta
6667 . get ( "general.architecture" )
6768 . ok_or ( KVCacheError :: ArchitectureNotFound ) ?;
@@ -94,15 +95,43 @@ pub async fn estimate_kv_cache_internal(
9495 let key_len_key = format ! ( "{}.attention.key_length" , arch) ;
9596 let val_len_key = format ! ( "{}.attention.value_length" , arch) ;
9697
97- let key_len = meta
98+ let mut key_len = meta
9899 . get ( & key_len_key)
99100 . and_then ( |s| s. parse :: < u64 > ( ) . ok ( ) )
100101 . unwrap_or ( 0 ) ;
101- let val_len = meta
102+ let mut val_len = meta
102103 . get ( & val_len_key)
103104 . and_then ( |s| s. parse :: < u64 > ( ) . ok ( ) )
104105 . unwrap_or ( 0 ) ;
105106
107+ // Fallback: calculate from embedding_length if key/val lengths not found
108+ if key_len == 0 || val_len == 0 {
109+ let emb_len_key = format ! ( "{}.embedding_length" , arch) ;
110+ let emb_len = meta
111+ . get ( & emb_len_key)
112+ . and_then ( |s| s. parse :: < u64 > ( ) . ok ( ) )
113+ . unwrap_or ( 0 ) ;
114+
115+ if emb_len > 0 && n_head > 0 {
116+ // For most transformers: head_dim = embedding_length / total_heads
117+ let total_heads = meta
118+ . get ( & n_head_key)
119+ . and_then ( |s| s. parse :: < u64 > ( ) . ok ( ) )
120+ . unwrap_or ( n_head) ;
121+
122+ let head_dim = emb_len / total_heads;
123+ key_len = head_dim;
124+ val_len = head_dim;
125+
126+ log:: info!(
127+ "Calculated key_len and val_len from embedding_length: {} / {} heads = {} per head" ,
128+ emb_len,
129+ total_heads,
130+ head_dim
131+ ) ;
132+ }
133+ }
134+
106135 if key_len == 0 || val_len == 0 {
107136 return Err ( KVCacheError :: EmbeddingLengthInvalid ) ;
108137 }
0 commit comments