microsoft · dluc · Oct 30, 2024 · Oct 30, 2024 · Oct 30, 2024 · Oct 30, 2024
diff --git a/applications/tests/Evaluation.Tests/appsettings.json b/applications/tests/Evaluation.Tests/appsettings.json
@@ -96,13 +96,22 @@
         "MaxRetries": 10
       },
       "LlamaSharp": {
-        // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
-        "ModelPath": "",
-        // Max number of tokens supported by the model
-        "MaxTokenTotal": 4096
-        // Optional parameters
-        // "GpuLayerCount": 32,
-        // "Seed": 1337,
+        "TextModel": {
+          // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        },
+        "EmbeddingModel": {
+          // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        }
       }
     },
     "DataIngestion": {

diff --git a/examples/001-dotnet-WebClient/file9-settings.json b/examples/001-dotnet-WebClient/file9-settings.json
@@ -65,13 +65,22 @@
         "MaxRetries": 10
       },
       "LlamaSharp": {
-        // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
-        "ModelPath": "",
-        // Max number of tokens supported by the model
-        "MaxTokenTotal": 4096
-        // Optional parameters
-        // "GpuLayerCount": 32,
-        // "Seed": 1337,
+        "TextModel": {
+          // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        },
+        "EmbeddingModel": {
+          // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        }
       },
       "AzureAIDocIntel": {
         // "APIKey" or "AzureIdentity".

diff --git a/examples/002-dotnet-Serverless/appsettings.json b/examples/002-dotnet-Serverless/appsettings.json
@@ -120,13 +120,22 @@
         // "Endpoint": "https://s3.amazonaws.com"
       },
       "LlamaSharp": {
-        // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
-        "ModelPath": "",
-        // Max number of tokens supported by the model
-        "MaxTokenTotal": 4096
-        // Optional parameters
-        // "GpuLayerCount": 32,
-        // "Seed": 1337,
+        "TextModel": {
+          // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        },
+        "EmbeddingModel": {
+          // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        }
       }
     },
     "DataIngestion": {

diff --git a/examples/002-dotnet-Serverless/file9-settings.json b/examples/002-dotnet-Serverless/file9-settings.json
@@ -65,13 +65,22 @@
         "MaxRetries": 10
       },
       "LlamaSharp": {
-        // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
-        "ModelPath": "",
-        // Max number of tokens supported by the model
-        "MaxTokenTotal": 4096
-        // Optional parameters
-        // "GpuLayerCount": 32,
-        // "Seed": 1337,
+        "TextModel": {
+          // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        },
+        "EmbeddingModel": {
+          // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        }
       },
       "AzureAIDocIntel": {
         // "APIKey" or "AzureIdentity".

diff --git a/examples/105-dotnet-serverless-llamasharp/appsettings.json b/examples/105-dotnet-serverless-llamasharp/appsettings.json
@@ -15,13 +15,22 @@
   "KernelMemory": {
     "Services": {
       "LlamaSharp": {
-        // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
-        "ModelPath": "",
-        // Max number of tokens supported by the model
-        "MaxTokenTotal": 4096
-        // Optional parameters
-        // "GpuLayerCount": 32,
-        // "Seed": 1337,
+        "TextModel": {
+          // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        },
+        "EmbeddingModel": {
+          // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        }
       },
       "AzureOpenAIEmbedding": {
         // "ApiKey" or "AzureIdentity"

diff --git a/examples/210-KM-without-builder/appsettings.json b/examples/210-KM-without-builder/appsettings.json
@@ -351,13 +351,22 @@
         "Replicas": 0
       },
       "LlamaSharp": {
-        // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
-        "ModelPath": "",
-        // Max number of tokens supported by the model
-        "MaxTokenTotal": 4096
-        // Optional parameters
-        // "GpuLayerCount": 32,
-        // "Seed": 1337,
+        "TextModel": {
+          // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        },
+        "EmbeddingModel": {
+          // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        }
       },
       "MongoDbAtlas": {
         "ConnectionString": "mongodb://root:root@localhost:27777/?authSource=admin",

diff --git a/examples/401-evaluation/appsettings.json b/examples/401-evaluation/appsettings.json
@@ -103,13 +103,22 @@
         "MaxEmbeddingBatchSize": 100
       },
       "LlamaSharp": {
-        // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
-        "ModelPath": "",
-        // Max number of tokens supported by the model
-        "MaxTokenTotal": 4096
-        // Optional parameters
-        // "GpuLayerCount": 32,
-        // "Seed": 1337,
+        "TextModel": {
+          // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        },
+        "EmbeddingModel": {
+          // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        }
       }
     },
     "DataIngestion": {

diff --git a/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextEmbeddingGeneratorTest.cs b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextEmbeddingGeneratorTest.cs
@@ -0,0 +1,47 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using Microsoft.KernelMemory;
+using Microsoft.KernelMemory.AI.LlamaSharp;
+using Microsoft.KM.TestHelpers;
+using Xunit.Abstractions;
+
+namespace Microsoft.LlamaSharp.FunctionalTests;
+
+public sealed class LlamaSharpTextEmbeddingGeneratorTest : BaseFunctionalTestCase
+{
+    private readonly LlamaSharpTextEmbeddingGenerator _target;
+
+    public LlamaSharpTextEmbeddingGeneratorTest(
+        IConfiguration cfg,
+        ITestOutputHelper output) : base(cfg, output)
+    {
+        this.LlamaSharpConfig.Validate();
+        this._target = new LlamaSharpTextEmbeddingGenerator(this.LlamaSharpConfig.EmbeddingModel, loggerFactory: null);
+        var modelFilename = this.LlamaSharpConfig.TextModel.ModelPath.Split('/').Last().Split('\\').Last();
+        Console.WriteLine($"Model in use: {modelFilename}");
+    }
+
+    [Fact]
+    [Trait("Category", "LlamaSharp")]
+    public async Task ItGeneratesEmbeddingVectors()
+    {
+        // Act
+        Embedding embedding = await this._target.GenerateEmbeddingAsync("some text");
+
+        // Assert
+        Console.WriteLine("Embedding size: " + embedding.Length);
+
+        // Expected result using nomic-embed-text-v1.5.Q8_0.gguf
+        Assert.Equal(768, embedding.Length);
+    }
+
+    protected override void Dispose(bool disposing)
+    {
+        if (disposing)
+        {
+            this._target.Dispose();
+        }
+
+        base.Dispose(disposing);
+    }
+}
diff --git a/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextGeneratorTest.cs b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextGeneratorTest.cs
@@ -21,8 +21,8 @@ public LlamaSharpTextGeneratorTest(
         this._timer = new Stopwatch();
 
         this.LlamaSharpConfig.Validate();
-        this._target = new LlamaSharpTextGenerator(this.LlamaSharpConfig, loggerFactory: null);
-        var modelFilename = this.LlamaSharpConfig.ModelPath.Split('/').Last().Split('\\').Last();
+        this._target = new LlamaSharpTextGenerator(this.LlamaSharpConfig.TextModel, loggerFactory: null);
+        var modelFilename = this.LlamaSharpConfig.TextModel.ModelPath.Split('/').Last().Split('\\').Last();
         Console.WriteLine($"Model in use: {modelFilename}");
     }
 

diff --git a/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTokenizerTest.cs b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTokenizerTest.cs
@@ -18,13 +18,13 @@ public LlamaSharpTokenizerTest(
     {
         this.LlamaSharpConfig.Validate();
 
-        var modelFilename = this.LlamaSharpConfig.ModelPath.Split('/').Last().Split('\\').Last();
+        var modelFilename = this.LlamaSharpConfig.TextModel.ModelPath.Split('/').Last().Split('\\').Last();
         Console.WriteLine($"Model in use: {modelFilename}");
 
-        var parameters = new ModelParams(this.LlamaSharpConfig.ModelPath)
+        var parameters = new ModelParams(this.LlamaSharpConfig.TextModel.ModelPath)
         {
-            ContextSize = this.LlamaSharpConfig.MaxTokenTotal,
-            GpuLayerCount = this.LlamaSharpConfig.GpuLayerCount ?? 20,
+            ContextSize = this.LlamaSharpConfig.TextModel.MaxTokenTotal,
+            GpuLayerCount = this.LlamaSharpConfig.TextModel.GpuLayerCount ?? 20,
         };
 
         LLamaWeights model = LLamaWeights.LoadFromFile(parameters);

diff --git a/extensions/LlamaSharp/LlamaSharp.FunctionalTests/appsettings.json b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/appsettings.json
@@ -6,13 +6,22 @@
     },
     "Services": {
       "LlamaSharp": {
-        // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
-        "ModelPath": "",
-        // Max number of tokens supported by the model
-        "MaxTokenTotal": 4096
-        // Optional parameters
-        // "GpuLayerCount": 32,
-        // "Seed": 1337,
+        "TextModel": {
+          // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        },
+        "EmbeddingModel": {
+          // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        }
       },
       "SimpleVectorDb": {
         // Options: "Disk" or "Volatile". Volatile data is lost after each execution.