Added update and defrag methods for KV cache in SafeLLamaContextHandle

martindevans · martindevans · commit b24581d74ce9 · 2024-04-13T02:39:07.000+01:00
diff --git a/LLama/Native/NativeApi.cs b/LLama/Native/NativeApi.cs
@@ -374,23 +374,6 @@ public static void llama_log_set(NativeLogConfig.LLamaLogCallback logCallback)
         [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
         public static extern LLamaPos llama_kv_cache_seq_pos_max(SafeLLamaContextHandle ctx, LLamaSeqId seq);
 
-        /// <summary>
-        /// Defragment the KV cache. This will be applied:
-        ///   - lazily on next llama_decode()
-        ///   - explicitly with llama_kv_cache_update()
-        /// </summary>
-        /// <param name="ctx"></param>
-        /// <returns></returns>
-        [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
-        public static extern LLamaPos llama_kv_cache_defrag(SafeLLamaContextHandle ctx);
-
-        /// <summary>
-        /// Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
-        /// </summary>
-        /// <param name="ctx"></param>
-        [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
-        public static extern void llama_kv_cache_update(SafeLLamaContextHandle ctx);
-
         /// <summary>
         /// Allocates a batch of tokens on the heap
         /// Each token can be assigned up to n_seq_max sequence ids
diff --git a/LLama/Native/SafeLLamaContextHandle.cs b/LLama/Native/SafeLLamaContextHandle.cs
@@ -264,6 +264,23 @@ static SafeLLamaContextHandle()
         /// </returns>
         [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
         private static extern unsafe nuint llama_state_seq_set_data(SafeLLamaContextHandle ctx, byte* src, LLamaSeqId dest_seq_id);
+
+        /// <summary>
+        /// Defragment the KV cache. This will be applied:
+        ///   - lazily on next llama_decode()
+        ///   - explicitly with llama_kv_cache_update()
+        /// </summary>
+        /// <param name="ctx"></param>
+        /// <returns></returns>
+        [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
+        private static extern void llama_kv_cache_defrag(SafeLLamaContextHandle ctx);
+
+        /// <summary>
+        /// Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
+        /// </summary>
+        /// <param name="ctx"></param>
+        [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
+        public static extern void llama_kv_cache_update(SafeLLamaContextHandle ctx);
         #endregion
 
         /// <summary>
@@ -487,6 +504,25 @@ public void SetThreads(uint threads, uint threadsBatch)
         }
 
         #region KV Cache Management
+        /// <summary>
+        /// Apply KV cache updates (such as K-shifts, defragmentation, etc.)
+        /// </summary>
+        public void KvCacheUpdate()
+        {
+            llama_kv_cache_update(this);
+        }
+
+        /// <summary>
+        /// Defragment the KV cache. This will be applied:
+        ///   - lazily on next llama_decode()
+        ///   - explicitly with llama_kv_cache_update()
+        /// </summary>
+        /// <returns></returns>
+        public void KvCacheDefrag()
+        {
+            llama_kv_cache_defrag(this);
+        }
+
         /// <summary>
         /// Get a new KV cache view that can be used to debug the KV cache
         /// </summary>