From 0763ca3ec19c8fef32d01d04bbcf8fce1d6226ef Mon Sep 17 00:00:00 2001
From: Martin Evans <martindevans@gmail.com>
Date: Fri, 23 Aug 2024 02:52:58 +0100
Subject: [PATCH 1/2] - Updated binaries to llama.cpp
 https://github.com/ggerganov/llama.cpp/blob/11b84eb4578864827afcf956db5b571003f18180
  - Built with https://github.com/SciSharp/LLamaSharp/actions/runs/10517369387

---
 LLama/Batched/LLamaContextExtensions.cs  | 25 ++++----
 LLama/LLamaContext.cs                    | 73 +++++++++++-------------
 LLama/LLamaSharp.csproj                  |  2 +-
 LLama/Native/LLamaModelQuantizeParams.cs |  2 +-
 LLama/Native/LLamaRopeType.cs            |  9 ++-
 LLama/Native/LLamaVocabPreType.cs        |  3 +
 LLama/Native/NativeApi.LLava.cs          |  2 +-
 LLama/Native/NativeApi.cs                | 14 ++---
 LLama/Native/SafeLLamaContextHandle.cs   | 46 ++++++++-------
 LLama/Native/SafeLlamaModelHandle.cs     | 28 +++++++++
 llama.cpp                                |  2 +-
 11 files changed, 117 insertions(+), 89 deletions(-)
diff --git a/LLama/Batched/LLamaContextExtensions.cs b/LLama/Batched/LLamaContextExtensions.cs
index 9355301a5..db25d499e 100644
--- a/LLama/Batched/LLamaContextExtensions.cs
+++ b/LLama/Batched/LLamaContextExtensions.cs
@@ -1,5 +1,6 @@
-﻿using System;
+using System;
 using System.Buffers.Binary;
+using System.Diagnostics;
 using System.IO;
 using System.IO.MemoryMappedFiles;
 using LLama.Native;
@@ -24,20 +25,20 @@ internal static void SaveState(this LLamaContext context, string filename, LLama
         if (File.Exists(filename))
             File.Delete(filename);
 
-        // Estimate size of state to write to disk, this is always equal to or greater than the actual size
-        var estimatedStateSize = checked((long)context.NativeHandle.GetStateSize(sequence));
+        // Get the exact size of the state
+        var stateSize = context.NativeHandle.GetStateSize(sequence);
 
         // Space for "extra" byte plus a 8 byte header
         var prefixSize = header.Length + 8;
 
         // Add enough space for the "extra" data and a 6 byte header
-        var totalFileSize = prefixSize + estimatedStateSize;
+        var totalFileSize = (nuint)prefixSize + stateSize;
 
         // Map the file and write the bytes directly to it.
-        long writtenBytes = 0;
-        using (var file = MemoryMappedFile.CreateFromFile(filename, FileMode.Create, null, totalFileSize))
+        nuint writtenBytes = 0;
+        using (var file = MemoryMappedFile.CreateFromFile(filename, FileMode.Create, null, (long)totalFileSize))
         {
-            using (var view = file.CreateViewAccessor(0, totalFileSize))
+            using (var view = file.CreateViewAccessor(0, (long)totalFileSize))
             {
                 unsafe
                 {
@@ -51,10 +52,10 @@ internal static void SaveState(this LLamaContext context, string filename, LLama
                         BinaryPrimitives.WriteUInt32BigEndian(new Span<byte>(ptr + writtenBytes, 4), (uint)header.Length);
                         writtenBytes += 4;
                         header.CopyTo(new Span<byte>(ptr + writtenBytes, header.Length));
-                        writtenBytes += header.Length;
+                        writtenBytes += (nuint)header.Length;
 
                         // Write state data
-                        writtenBytes += (long)context.NativeHandle.GetState(ptr + writtenBytes, (ulong)estimatedStateSize, sequence);
+                        writtenBytes += context.NativeHandle.GetState(ptr + writtenBytes, stateSize, sequence);
                     }
                     finally
                     {
@@ -64,9 +65,7 @@ internal static void SaveState(this LLamaContext context, string filename, LLama
             }
         }
 
-        // Truncate the file to the actual size of data that was written
-        using (var fileStream = new FileStream(filename, FileMode.Open))
-            fileStream.SetLength(writtenBytes);
+        Debug.Assert(totalFileSize == writtenBytes, $"Expected to write {totalFileSize} bytes, but actally wrote {writtenBytes}");
     }
 
     /// <summary>
@@ -105,7 +104,7 @@ internal static void LoadState(this LLamaContext context, string filename, LLama
                     new Span<byte>(ptr + readBytes, headerLength).CopyTo(header);
                     readBytes += headerLength;
 
-                    context.NativeHandle.SetState(ptr + readBytes, sequence);
+                    context.NativeHandle.SetState(ptr + readBytes, (nuint)((long)view.SafeMemoryMappedViewHandle.ByteLength - readBytes), sequence);
                 }
                 finally
                 {
diff --git a/LLama/LLamaContext.cs b/LLama/LLamaContext.cs
index 70341649b..36c6de7e5 100644
--- a/LLama/LLamaContext.cs
+++ b/LLama/LLamaContext.cs
@@ -1,7 +1,7 @@
-using LLama.Exceptions;
 using LLama.Native;
 using System;
 using System.Collections.Generic;
+using System.Diagnostics;
 using System.Text;
 using System.IO;
 using System.IO.MemoryMappedFiles;
@@ -150,13 +150,13 @@ public void SaveState(string filename)
             if (File.Exists(filename))
                 File.Delete(filename);
 
-            // Estimate size of state to write to disk, this is always equal to or greater than the actual size
-            var estimatedStateSize = checked((long)NativeHandle.GetStateSize());
+            // Get the exact size of the state
+            var stateSize = NativeHandle.GetStateSize();
 
             // Map the file and write the bytes directly to it. This saves copying the bytes into a C# array
-            long writtenBytes;
-            using (var file = MemoryMappedFile.CreateFromFile(filename, FileMode.Create, null, estimatedStateSize))
-            using (var view = file.CreateViewAccessor(0, estimatedStateSize))
+            nuint writtenBytes;
+            using (var file = MemoryMappedFile.CreateFromFile(filename, FileMode.Create, null, checked((long)stateSize)))
+            using (var view = file.CreateViewAccessor(0, checked((long)stateSize)))
             {
                 unsafe
                 {
@@ -164,7 +164,7 @@ public void SaveState(string filename)
                     view.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr);
                     try
                     {
-                        writtenBytes = (long)NativeHandle.GetState(ptr, (ulong)estimatedStateSize);
+                        writtenBytes = NativeHandle.GetState(ptr, stateSize);
                     }
                     finally
                     {
@@ -173,9 +173,7 @@ public void SaveState(string filename)
                 }
             }
 
-            // Truncate the file to the actual size of data that was written
-            using (var fileStream = new FileStream(filename, FileMode.Open))
-                fileStream.SetLength(writtenBytes);
+            Debug.Assert(stateSize == writtenBytes, $"Expected to write {stateSize} bytes, but actally wrote {writtenBytes}");
         }
 
         /// <summary>
@@ -189,13 +187,13 @@ public void SaveState(string filename, LLamaSeqId sequence)
             if (File.Exists(filename))
                 File.Delete(filename);
 
-            // Estimate size of state to write to disk, this is always equal to or greater than the actual size
-            var estimatedStateSize = checked((long)NativeHandle.GetStateSize(sequence));
+            // Get the exact size of the state
+            var stateSize = NativeHandle.GetStateSize(sequence);
 
             // Map the file and write the bytes directly to it. This saves copying the bytes into a C# array
-            long writtenBytes;
-            using (var file = MemoryMappedFile.CreateFromFile(filename, FileMode.Create, null, estimatedStateSize))
-            using (var view = file.CreateViewAccessor(0, estimatedStateSize))
+            nuint writtenBytes;
+            using (var file = MemoryMappedFile.CreateFromFile(filename, FileMode.Create, null, checked((long)stateSize)))
+            using (var view = file.CreateViewAccessor(0, checked((long)stateSize)))
             {
                 unsafe
                 {
@@ -203,7 +201,7 @@ public void SaveState(string filename, LLamaSeqId sequence)
                     view.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr);
                     try
                     {
-                        writtenBytes = (long)NativeHandle.GetState(ptr, (ulong)estimatedStateSize, sequence);
+                        writtenBytes = NativeHandle.GetState(ptr, stateSize, sequence);
                     }
                     finally
                     {
@@ -212,9 +210,7 @@ public void SaveState(string filename, LLamaSeqId sequence)
                 }
             }
 
-            // Truncate the file to the actual size of data that was written
-            using (var fileStream = new FileStream(filename, FileMode.Open))
-                fileStream.SetLength(writtenBytes);
+            Debug.Assert(stateSize == writtenBytes, $"Expected to write {stateSize} bytes, but actally wrote {writtenBytes}");
         }
 
         /// <summary>
@@ -230,15 +226,14 @@ public State GetState()
             var memory = Marshal.AllocHGlobal((nint)stateSize);
             try
             {
-                // Copy the state data into memory, discover the actual size required
-                ulong actualSize;
+                // Copy the state data into memory
+                nuint actualSize;
                 unsafe
                 {
                     actualSize = NativeHandle.GetState((byte*)memory, stateSize);
                 }
 
-                // Shrink to size
-                memory = Marshal.ReAllocHGlobal(memory, (nint)actualSize);
+                Debug.Assert(actualSize == stateSize);
 
                 // Wrap memory in a "state"
                 var state = new State(memory, actualSize);
@@ -269,14 +264,13 @@ public SequenceState GetState(LLamaSeqId sequence)
             try
             {
                 // Copy the state data into memory, discover the actual size required
-                ulong actualSize;
+                nuint actualSize;
                 unsafe
                 {
                     actualSize = NativeHandle.GetState((byte*)memory, stateSize, sequence);
                 }
 
-                // Shrink to size
-                memory = Marshal.ReAllocHGlobal(memory, (nint)actualSize);
+                Debug.Assert(actualSize == stateSize);
 
                 // Wrap memory in a "state"
                 var state = new SequenceState(memory, actualSize);
@@ -309,7 +303,7 @@ public void LoadState(string filename)
                     view.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr);
                     try
                     {
-                        NativeHandle.SetState(ptr);
+                        NativeHandle.SetState(ptr, (nuint)view.SafeMemoryMappedViewHandle.ByteLength);
                     }
                     finally
                     {
@@ -336,7 +330,7 @@ public void LoadState(string filename, LLamaSeqId sequence)
                     view.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr);
                     try
                     {
-                        NativeHandle.SetState(ptr, sequence);
+                        NativeHandle.SetState(ptr, (nuint)view.SafeMemoryMappedViewHandle.ByteLength, sequence);
                     }
                     finally
                     {
@@ -354,7 +348,7 @@ public void LoadState(State state)
         {
             unsafe
             {
-                NativeHandle.SetState((byte*)state.DangerousGetHandle());
+                NativeHandle.SetState((byte*)state.DangerousGetHandle(), state.Size);
             }
         }
 
@@ -367,7 +361,7 @@ public void LoadState(SequenceState state, LLamaSeqId sequence)
         {
             unsafe
             {
-                NativeHandle.SetState((byte*)state.DangerousGetHandle(), sequence);
+                NativeHandle.SetState((byte*)state.DangerousGetHandle(), state.Size, sequence);
             }
         }
         #endregion
@@ -380,7 +374,8 @@ public void LoadState(SequenceState state, LLamaSeqId sequence)
         public bool ShouldAddBosToken()
         {
             var addBos = NativeApi.llama_add_bos_token(NativeHandle.ModelHandle);
-            return addBos != -1 ? Convert.ToBoolean(addBos) : NativeHandle.LLamaVocabType == LLamaVocabType.SentencePiece;
+            //return addBos != -1 ? Convert.ToBoolean(addBos) : NativeHandle.LLamaVocabType == LLamaVocabType.SentencePiece;
+            return addBos;
         }
 
         #region eval overloads
@@ -458,13 +453,13 @@ public void Dispose()
         public class State
             : SafeLLamaHandleBase
         {
-            private readonly ulong _size;
+            private readonly nuint _size;
             /// <summary>
             /// Get the size in bytes of this state object
             /// </summary>
-            public ulong Size => _size;
+            public nuint Size => _size;
 
-            internal State(IntPtr memory, ulong size)
+            internal State(IntPtr memory, nuint size)
                 : base(memory, true)
             {
                 _size = size;
@@ -513,7 +508,7 @@ public void Save(Stream stream)
             public static async Task<State> LoadAsync(Stream stream)
             {
                 var memory = Marshal.AllocHGlobal((nint)stream.Length);
-                var state = new State(memory, checked((ulong)stream.Length));
+                var state = new State(memory, (nuint)stream.Length);
 
                 UnmanagedMemoryStream dest;
                 unsafe
@@ -533,7 +528,7 @@ public static async Task<State> LoadAsync(Stream stream)
             public static State Load(Stream stream)
             {
                 var memory = Marshal.AllocHGlobal((nint)stream.Length);
-                var state = new State(memory, checked((ulong)stream.Length));
+                var state = new State(memory, (nuint)stream.Length);
 
                 unsafe
                 {
@@ -551,13 +546,13 @@ public static State Load(Stream stream)
         public class SequenceState
             : SafeLLamaHandleBase
         {
-            private readonly ulong _size;
+            private readonly nuint _size;
             /// <summary>
             /// Get the size in bytes of this state object
             /// </summary>
-            public ulong Size => _size;
+            public nuint Size => _size;
 
-            internal SequenceState(IntPtr memory, ulong size)
+            internal SequenceState(IntPtr memory, nuint size)
                 : base(memory, true)
             {
                 _size = size;
diff --git a/LLama/LLamaSharp.csproj b/LLama/LLamaSharp.csproj
index bccf2b3f6..addda27f2 100644
--- a/LLama/LLamaSharp.csproj
+++ b/LLama/LLamaSharp.csproj
@@ -53,7 +53,7 @@
   </ItemGroup>
 
   <PropertyGroup>
-    <BinaryReleaseId>345c8c0c87a97c1595f9c8b</BinaryReleaseId>
+    <BinaryReleaseId>11b84eb4578864827afcf</BinaryReleaseId>
   </PropertyGroup>
 
   <PropertyGroup>
diff --git a/LLama/Native/LLamaModelQuantizeParams.cs b/LLama/Native/LLamaModelQuantizeParams.cs
index 8979d8724..d11f4882e 100644
--- a/LLama/Native/LLamaModelQuantizeParams.cs
+++ b/LLama/Native/LLamaModelQuantizeParams.cs
@@ -25,7 +25,7 @@ public struct LLamaModelQuantizeParams
         public GGMLType output_tensor_type;
 
         /// <summary>
-        /// itoken embeddings tensor type
+        /// token embeddings tensor type
         /// </summary>
         public GGMLType token_embedding_type;
 
diff --git a/LLama/Native/LLamaRopeType.cs b/LLama/Native/LLamaRopeType.cs
index 19e50e6b9..ebad9e77b 100644
--- a/LLama/Native/LLamaRopeType.cs
+++ b/LLama/Native/LLamaRopeType.cs
@@ -1,9 +1,12 @@
-﻿namespace LLama.Native;
+namespace LLama.Native;
 
+/// <summary>
+/// 
+/// </summary>
+/// <remarks>llama_rope_type</remarks>
 public enum LLamaRopeType
 {
     None = -1,
     Norm = 0,
-    NEOX = 2,
-    GLM = 4,
+    NEOX = 2,//GGML_ROPE_TYPE_NEOX,
 }
\ No newline at end of file
diff --git a/LLama/Native/LLamaVocabPreType.cs b/LLama/Native/LLamaVocabPreType.cs
index 3e5bc287c..35ed39c06 100644
--- a/LLama/Native/LLamaVocabPreType.cs
+++ b/LLama/Native/LLamaVocabPreType.cs
@@ -30,4 +30,7 @@ internal enum LLamaVocabPreType
     TEKKEN = 20,
     SMOLLM = 21,
     CODESHELL = 22,
+    BLOOM = 23,
+    GPT3_FINNISH = 24,
+    EXAONE = 25,
 }
\ No newline at end of file
diff --git a/LLama/Native/NativeApi.LLava.cs b/LLama/Native/NativeApi.LLava.cs
index 246a9d1b2..bda796d4c 100644
--- a/LLama/Native/NativeApi.LLava.cs
+++ b/LLama/Native/NativeApi.LLava.cs
@@ -2,7 +2,7 @@
 
 namespace LLama.Native;
 
-public static unsafe partial class NativeApi
+public static partial class NativeApi
 {
     /// <summary>
     /// Sanity check for clip &lt;-&gt; llava embed size match
diff --git a/LLama/Native/NativeApi.cs b/LLama/Native/NativeApi.cs
index f7b97bead..8d967e670 100644
--- a/LLama/Native/NativeApi.cs
+++ b/LLama/Native/NativeApi.cs
@@ -188,19 +188,13 @@ public static unsafe int llama_chat_apply_template(SafeLlamaModelHandle? model,
             static extern int internal_llama_chat_apply_template(IntPtr model, byte* tmpl, LLamaChatMessage* chat, nuint n_msg, [MarshalAs(UnmanagedType.U1)] bool add_ass, byte* buf, int length);
         }
 
-        /// <summary>
-        /// Returns -1 if unknown, 1 for true or 0 for false.
-        /// </summary>
-        /// <returns></returns>
         [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
-        public static extern int llama_add_bos_token(SafeLlamaModelHandle model);
+        [return: MarshalAs(UnmanagedType.U1)]
+        public static extern bool llama_add_bos_token(SafeLlamaModelHandle model);
 
-        /// <summary>
-        /// Returns -1 if unknown, 1 for true or 0 for false.
-        /// </summary>
-        /// <returns></returns>
         [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
-        public static extern int llama_add_eos_token(SafeLlamaModelHandle model);
+        [return: MarshalAs(UnmanagedType.U1)]
+        public static extern bool llama_add_eos_token(SafeLlamaModelHandle model);
 
         /// <summary>
         /// Print out timing information for this context
diff --git a/LLama/Native/SafeLLamaContextHandle.cs b/LLama/Native/SafeLLamaContextHandle.cs
index 628936352..b5932aa04 100644
--- a/LLama/Native/SafeLLamaContextHandle.cs
+++ b/LLama/Native/SafeLLamaContextHandle.cs
@@ -258,13 +258,13 @@ static SafeLLamaContextHandle()
         private static extern void llama_set_rng_seed(SafeLLamaContextHandle ctx, uint seed);
 
         /// <summary>
-        /// Returns the maximum size in bytes of the state (rng, logits, embedding
-        /// and kv_cache) - will often be smaller after compacting tokens
+        /// Returns the **actual** size in bytes of the state (rng, logits, embedding and kv_cache).
+        /// Only use when saving the state, not when restoring it, otherwise the size may be too small.
         /// </summary>
         /// <param name="ctx"></param>
         /// <returns></returns>
         [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
-        private static extern ulong llama_state_get_size(SafeLLamaContextHandle ctx);
+        private static extern nuint llama_state_get_size(SafeLLamaContextHandle ctx);
 
         /// <summary>
         /// Copies the state to the specified destination address.
@@ -272,47 +272,51 @@ static SafeLLamaContextHandle()
         /// </summary>
         /// <param name="ctx"></param>
         /// <param name="dest"></param>
+        /// <param name="size"></param>
         /// <returns>the number of bytes copied</returns>
         [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
-        private static extern unsafe ulong llama_state_get_data(SafeLLamaContextHandle ctx, byte* dest);
+        private static extern unsafe nuint llama_state_get_data(SafeLLamaContextHandle ctx, byte* dest, nuint size);
 
         /// <summary>
         /// Set the state reading from the specified address
         /// </summary>
         /// <param name="ctx"></param>
         /// <param name="src"></param>
+        /// <param name="size"></param>
         /// <returns>the number of bytes read</returns>
         [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
-        private static extern unsafe ulong llama_state_set_data(SafeLLamaContextHandle ctx, byte* src);
+        private static extern unsafe nuint llama_state_set_data(SafeLLamaContextHandle ctx, byte* src, nuint size);
 
         /// <summary>
         /// Get the exact size needed to copy the KV cache of a single sequence
         /// </summary>
         [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
-        private static extern nuint llama_state_seq_get_size(SafeLLamaContextHandle ctx, LLamaSeqId seq_id);
+        private static extern nuint llama_state_seq_get_size(SafeLLamaContextHandle ctx, LLamaSeqId seqId);
 
         /// <summary>
         /// Copy the KV cache of a single sequence into the specified buffer
         /// </summary>
         /// <param name="ctx"></param>
         /// <param name="dst"></param>
-        /// <param name="seq_id"></param>
+        /// <param name="size"></param>
+        /// <param name="seqId"></param>
         /// <returns></returns>
         [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
-        private static extern unsafe nuint llama_state_seq_get_data(SafeLLamaContextHandle ctx, byte* dst, LLamaSeqId seq_id);
+        private static extern unsafe nuint llama_state_seq_get_data(SafeLLamaContextHandle ctx, byte* dst, nuint size, LLamaSeqId seqId);
 
         /// <summary>
         /// Copy the sequence data (originally copied with `llama_state_seq_get_data`) into the specified sequence
         /// </summary>
         /// <param name="ctx"></param>
         /// <param name="src"></param>
-        /// <param name="dest_seq_id"></param>
+        /// <param name="size"></param>
+        /// <param name="destSeqId"></param>
         /// <returns>
         ///  - Positive: Ok
         ///  - Zero: Failed to load
         /// </returns>
         [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
-        private static extern unsafe nuint llama_state_seq_set_data(SafeLLamaContextHandle ctx, byte* src, LLamaSeqId dest_seq_id);
+        private static extern unsafe nuint llama_state_seq_set_data(SafeLLamaContextHandle ctx, byte* src, nuint size, LLamaSeqId destSeqId);
 
         /// <summary>
         /// Defragment the KV cache. This will be applied:
@@ -569,7 +573,7 @@ public DecodeResult Decode(LLamaBatchEmbeddings batch)
         /// <summary>
         /// Get the size of the state, when saved as bytes
         /// </summary>
-        public ulong GetStateSize()
+        public nuint GetStateSize()
         {
             return llama_state_get_size(this);
         }
@@ -579,7 +583,7 @@ public ulong GetStateSize()
         /// </summary>
         /// <param name="sequence"></param>
         /// <returns></returns>
-        public ulong GetStateSize(LLamaSeqId sequence)
+        public nuint GetStateSize(LLamaSeqId sequence)
         {
             return llama_state_seq_get_size(this, sequence);
         }
@@ -591,13 +595,13 @@ public ulong GetStateSize(LLamaSeqId sequence)
         /// <param name="size">Number of bytes available to write to in dest (check required size with `GetStateSize()`)</param>
         /// <returns>The number of bytes written to dest</returns>
         /// <exception cref="ArgumentOutOfRangeException">Thrown if dest is too small</exception>
-        public unsafe ulong GetState(byte* dest, ulong size)
+        public unsafe nuint GetState(byte* dest, nuint size)
         {
             var required = GetStateSize();
             if (size < required)
                 throw new ArgumentOutOfRangeException(nameof(size), $"Allocated space is too small, {size} < {required}");
 
-            return llama_state_get_data(this, dest);
+            return llama_state_get_data(this, dest, size);
         }
 
         /// <summary>
@@ -607,23 +611,24 @@ public unsafe ulong GetState(byte* dest, ulong size)
         /// <param name="size">Number of bytes available to write to in dest (check required size with `GetStateSize()`)</param>
         /// <param name="sequence">The sequence to get state data for</param>
         /// <returns>The number of bytes written to dest</returns>
-        public unsafe ulong GetState(byte* dest, ulong size, LLamaSeqId sequence)
+        public unsafe nuint GetState(byte* dest, nuint size, LLamaSeqId sequence)
         {
             var required = GetStateSize(sequence);
             if (size < required)
                 throw new ArgumentOutOfRangeException(nameof(size), $"Allocated space is too small, {size} < {required}");
 
-            return llama_state_seq_get_data(this, dest, sequence);
+            return llama_state_seq_get_data(this, dest, size, sequence);
         }
 
         /// <summary>
         /// Set the raw state of this context
         /// </summary>
         /// <param name="src">The pointer to read the state from</param>
+        /// <param name="size">Number of bytes that can be safely read from the pointer</param>
         /// <returns>Number of bytes read from the src pointer</returns>
-        public unsafe ulong SetState(byte* src)
+        public unsafe nuint SetState(byte* src, nuint size)
         {
-            return llama_state_set_data(this, src);
+            return llama_state_set_data(this, src, size);
         }
 
         /// <summary>
@@ -631,10 +636,11 @@ public unsafe ulong SetState(byte* src)
         /// </summary>
         /// <param name="src">The pointer to read the state from</param>
         /// <param name="sequence">Sequence ID to set</param>
+        /// <param name="size">Number of bytes that can be safely read from the pointer</param>
         /// <returns>Number of bytes read from the src pointer</returns>
-        public unsafe ulong SetState(byte* src, LLamaSeqId sequence)
+        public unsafe nuint SetState(byte* src, nuint size, LLamaSeqId sequence)
         {
-            return llama_state_seq_set_data(this, src, sequence);
+            return llama_state_seq_set_data(this, src, size, sequence);
         }
         #endregion
 
diff --git a/LLama/Native/SafeLlamaModelHandle.cs b/LLama/Native/SafeLlamaModelHandle.cs
index eaf76421a..66f25bb36 100644
--- a/LLama/Native/SafeLlamaModelHandle.cs
+++ b/LLama/Native/SafeLlamaModelHandle.cs
@@ -65,6 +65,16 @@ public sealed class SafeLlamaModelHandle
         /// </summary>
         public bool HasEncoder => llama_model_has_encoder(this);
 
+        /// <summary>
+        /// Returns true if the model contains a decoder that requires llama_decode() call
+        /// </summary>
+        public bool HasDecoder => llama_model_has_decoder(this);
+
+        /// <summary>
+        /// Returns true if the model is recurrent (like Mamba, RWKV, etc.)
+        /// </summary>
+        public bool IsRecurrent => llama_model_is_recurrent(this);
+
         /// <summary>
         /// Get a description of this model
         /// </summary>
@@ -434,8 +444,26 @@ private static int llama_model_meta_val_str(SafeLlamaModelHandle model, string k
         [return: MarshalAs(UnmanagedType.U1)]
         private static extern bool llama_model_has_encoder(SafeLlamaModelHandle model);
 
+        /// <summary>
+        /// Returns true if the model contains a decoder that requires llama_decode() call
+        /// </summary>
+        /// <param name="model"></param>
+        /// <returns></returns>
+        [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
+        [return: MarshalAs(UnmanagedType.U1)]
+        private static extern bool llama_model_has_decoder(SafeLlamaModelHandle model);
+
         [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
         private static extern IntPtr llama_lora_adapter_init(SafeLlamaModelHandle model, string path);
+
+        /// <summary>
+        /// Returns true if the model is recurrent (like Mamba, RWKV, etc.)
+        /// </summary>
+        /// <param name="model"></param>
+        /// <returns></returns>
+        [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
+        [return: MarshalAs(UnmanagedType.U1)]
+        private static extern bool llama_model_is_recurrent(SafeLlamaModelHandle model);
         #endregion
 
         #region LoRA
diff --git a/llama.cpp b/llama.cpp
index 345c8c0c8..11b84eb45 160000
--- a/llama.cpp
+++ b/llama.cpp
@@ -1 +1 @@
-Subproject commit 345c8c0c87a97c1595f9c8b14833d531c8c7d8df
+Subproject commit 11b84eb4578864827afcf956db5b571003f18180

From 39457059bcadac0fe0c64012fe4d0f54394fbe69 Mon Sep 17 00:00:00 2001
From: Martin Evans <martindevans@gmail.com>
Date: Fri, 23 Aug 2024 02:55:59 +0100
Subject: [PATCH 2/2] Spelling

---
 LLama/Batched/LLamaContextExtensions.cs | 2 +-
 LLama/LLamaContext.cs                   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/LLama/Batched/LLamaContextExtensions.cs b/LLama/Batched/LLamaContextExtensions.cs
index db25d499e..71fa24ea2 100644
--- a/LLama/Batched/LLamaContextExtensions.cs
+++ b/LLama/Batched/LLamaContextExtensions.cs
@@ -65,7 +65,7 @@ internal static void SaveState(this LLamaContext context, string filename, LLama
             }
         }
 
-        Debug.Assert(totalFileSize == writtenBytes, $"Expected to write {totalFileSize} bytes, but actally wrote {writtenBytes}");
+        Debug.Assert(totalFileSize == writtenBytes, $"Expected to write {totalFileSize} bytes, but actually wrote {writtenBytes}");
     }
 
     /// <summary>
diff --git a/LLama/LLamaContext.cs b/LLama/LLamaContext.cs
index 36c6de7e5..ca38d49e4 100644
--- a/LLama/LLamaContext.cs
+++ b/LLama/LLamaContext.cs
@@ -173,7 +173,7 @@ public void SaveState(string filename)
                 }
             }
 
-            Debug.Assert(stateSize == writtenBytes, $"Expected to write {stateSize} bytes, but actally wrote {writtenBytes}");
+            Debug.Assert(stateSize == writtenBytes, $"Expected to write {stateSize} bytes, but actually wrote {writtenBytes}");
         }
 
         /// <summary>
@@ -210,7 +210,7 @@ public void SaveState(string filename, LLamaSeqId sequence)
                 }
             }
 
-            Debug.Assert(stateSize == writtenBytes, $"Expected to write {stateSize} bytes, but actally wrote {writtenBytes}");
+            Debug.Assert(stateSize == writtenBytes, $"Expected to write {stateSize} bytes, but actually wrote {writtenBytes}");
         }
 
         /// <summary>