diff --git a/applications/tests/Evaluation.Tests/appsettings.json b/applications/tests/Evaluation.Tests/appsettings.json index 3b0e8c07f..6b9c02d2d 100644 --- a/applications/tests/Evaluation.Tests/appsettings.json +++ b/applications/tests/Evaluation.Tests/appsettings.json @@ -96,13 +96,22 @@ "MaxRetries": 10 }, "LlamaSharp": { - // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" - "ModelPath": "", - // Max number of tokens supported by the model - "MaxTokenTotal": 4096 - // Optional parameters - // "GpuLayerCount": 32, - // "Seed": 1337, + "TextModel": { + // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + }, + "EmbeddingModel": { + // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + } } }, "DataIngestion": { diff --git a/examples/001-dotnet-WebClient/file9-settings.json b/examples/001-dotnet-WebClient/file9-settings.json index e98ef739e..03ae0f9ed 100644 --- a/examples/001-dotnet-WebClient/file9-settings.json +++ b/examples/001-dotnet-WebClient/file9-settings.json @@ -65,13 +65,22 @@ "MaxRetries": 10 }, "LlamaSharp": { - // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" - "ModelPath": "", - // Max number of tokens supported by the model - "MaxTokenTotal": 4096 - // Optional parameters - // "GpuLayerCount": 32, - // "Seed": 1337, + "TextModel": { + // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + }, + "EmbeddingModel": { + // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + } }, "AzureAIDocIntel": { // "APIKey" or "AzureIdentity". diff --git a/examples/002-dotnet-Serverless/appsettings.json b/examples/002-dotnet-Serverless/appsettings.json index 2a0b16831..8e060a1df 100644 --- a/examples/002-dotnet-Serverless/appsettings.json +++ b/examples/002-dotnet-Serverless/appsettings.json @@ -120,13 +120,22 @@ // "Endpoint": "https://s3.amazonaws.com" }, "LlamaSharp": { - // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" - "ModelPath": "", - // Max number of tokens supported by the model - "MaxTokenTotal": 4096 - // Optional parameters - // "GpuLayerCount": 32, - // "Seed": 1337, + "TextModel": { + // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + }, + "EmbeddingModel": { + // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + } } }, "DataIngestion": { diff --git a/examples/002-dotnet-Serverless/file9-settings.json b/examples/002-dotnet-Serverless/file9-settings.json index e98ef739e..03ae0f9ed 100644 --- a/examples/002-dotnet-Serverless/file9-settings.json +++ b/examples/002-dotnet-Serverless/file9-settings.json @@ -65,13 +65,22 @@ "MaxRetries": 10 }, "LlamaSharp": { - // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" - "ModelPath": "", - // Max number of tokens supported by the model - "MaxTokenTotal": 4096 - // Optional parameters - // "GpuLayerCount": 32, - // "Seed": 1337, + "TextModel": { + // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + }, + "EmbeddingModel": { + // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + } }, "AzureAIDocIntel": { // "APIKey" or "AzureIdentity". diff --git a/examples/105-dotnet-serverless-llamasharp/appsettings.json b/examples/105-dotnet-serverless-llamasharp/appsettings.json index d8ebcecb7..da78f1801 100644 --- a/examples/105-dotnet-serverless-llamasharp/appsettings.json +++ b/examples/105-dotnet-serverless-llamasharp/appsettings.json @@ -15,13 +15,22 @@ "KernelMemory": { "Services": { "LlamaSharp": { - // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" - "ModelPath": "", - // Max number of tokens supported by the model - "MaxTokenTotal": 4096 - // Optional parameters - // "GpuLayerCount": 32, - // "Seed": 1337, + "TextModel": { + // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + }, + "EmbeddingModel": { + // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + } }, "AzureOpenAIEmbedding": { // "ApiKey" or "AzureIdentity" diff --git a/examples/210-KM-without-builder/appsettings.json b/examples/210-KM-without-builder/appsettings.json index 5fa67ec65..83857dd2c 100644 --- a/examples/210-KM-without-builder/appsettings.json +++ b/examples/210-KM-without-builder/appsettings.json @@ -351,13 +351,22 @@ "Replicas": 0 }, "LlamaSharp": { - // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" - "ModelPath": "", - // Max number of tokens supported by the model - "MaxTokenTotal": 4096 - // Optional parameters - // "GpuLayerCount": 32, - // "Seed": 1337, + "TextModel": { + // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + }, + "EmbeddingModel": { + // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + } }, "MongoDbAtlas": { "ConnectionString": "mongodb://root:root@localhost:27777/?authSource=admin", diff --git a/examples/401-evaluation/appsettings.json b/examples/401-evaluation/appsettings.json index 694fd3115..1bb5005d3 100644 --- a/examples/401-evaluation/appsettings.json +++ b/examples/401-evaluation/appsettings.json @@ -103,13 +103,22 @@ "MaxEmbeddingBatchSize": 100 }, "LlamaSharp": { - // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" - "ModelPath": "", - // Max number of tokens supported by the model - "MaxTokenTotal": 4096 - // Optional parameters - // "GpuLayerCount": 32, - // "Seed": 1337, + "TextModel": { + // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + }, + "EmbeddingModel": { + // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + } } }, "DataIngestion": { diff --git a/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextEmbeddingGeneratorTest.cs b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextEmbeddingGeneratorTest.cs new file mode 100644 index 000000000..922b4b498 --- /dev/null +++ b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextEmbeddingGeneratorTest.cs @@ -0,0 +1,47 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.KernelMemory; +using Microsoft.KernelMemory.AI.LlamaSharp; +using Microsoft.KM.TestHelpers; +using Xunit.Abstractions; + +namespace Microsoft.LlamaSharp.FunctionalTests; + +public sealed class LlamaSharpTextEmbeddingGeneratorTest : BaseFunctionalTestCase +{ + private readonly LlamaSharpTextEmbeddingGenerator _target; + + public LlamaSharpTextEmbeddingGeneratorTest( + IConfiguration cfg, + ITestOutputHelper output) : base(cfg, output) + { + this.LlamaSharpConfig.Validate(); + this._target = new LlamaSharpTextEmbeddingGenerator(this.LlamaSharpConfig.EmbeddingModel, loggerFactory: null); + var modelFilename = this.LlamaSharpConfig.TextModel.ModelPath.Split('/').Last().Split('\\').Last(); + Console.WriteLine($"Model in use: {modelFilename}"); + } + + [Fact] + [Trait("Category", "LlamaSharp")] + public async Task ItGeneratesEmbeddingVectors() + { + // Act + Embedding embedding = await this._target.GenerateEmbeddingAsync("some text"); + + // Assert + Console.WriteLine("Embedding size: " + embedding.Length); + + // Expected result using nomic-embed-text-v1.5.Q8_0.gguf + Assert.Equal(768, embedding.Length); + } + + protected override void Dispose(bool disposing) + { + if (disposing) + { + this._target.Dispose(); + } + + base.Dispose(disposing); + } +} diff --git a/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextGeneratorTest.cs b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextGeneratorTest.cs index cfb575aa5..50016dd0a 100644 --- a/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextGeneratorTest.cs +++ b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextGeneratorTest.cs @@ -21,8 +21,8 @@ public LlamaSharpTextGeneratorTest( this._timer = new Stopwatch(); this.LlamaSharpConfig.Validate(); - this._target = new LlamaSharpTextGenerator(this.LlamaSharpConfig, loggerFactory: null); - var modelFilename = this.LlamaSharpConfig.ModelPath.Split('/').Last().Split('\\').Last(); + this._target = new LlamaSharpTextGenerator(this.LlamaSharpConfig.TextModel, loggerFactory: null); + var modelFilename = this.LlamaSharpConfig.TextModel.ModelPath.Split('/').Last().Split('\\').Last(); Console.WriteLine($"Model in use: {modelFilename}"); } diff --git a/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTokenizerTest.cs b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTokenizerTest.cs index a78ee0864..50e1c2276 100644 --- a/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTokenizerTest.cs +++ b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTokenizerTest.cs @@ -18,13 +18,13 @@ public LlamaSharpTokenizerTest( { this.LlamaSharpConfig.Validate(); - var modelFilename = this.LlamaSharpConfig.ModelPath.Split('/').Last().Split('\\').Last(); + var modelFilename = this.LlamaSharpConfig.TextModel.ModelPath.Split('/').Last().Split('\\').Last(); Console.WriteLine($"Model in use: {modelFilename}"); - var parameters = new ModelParams(this.LlamaSharpConfig.ModelPath) + var parameters = new ModelParams(this.LlamaSharpConfig.TextModel.ModelPath) { - ContextSize = this.LlamaSharpConfig.MaxTokenTotal, - GpuLayerCount = this.LlamaSharpConfig.GpuLayerCount ?? 20, + ContextSize = this.LlamaSharpConfig.TextModel.MaxTokenTotal, + GpuLayerCount = this.LlamaSharpConfig.TextModel.GpuLayerCount ?? 20, }; LLamaWeights model = LLamaWeights.LoadFromFile(parameters); diff --git a/extensions/LlamaSharp/LlamaSharp.FunctionalTests/appsettings.json b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/appsettings.json index f4653ee2d..2c2a73001 100644 --- a/extensions/LlamaSharp/LlamaSharp.FunctionalTests/appsettings.json +++ b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/appsettings.json @@ -6,13 +6,22 @@ }, "Services": { "LlamaSharp": { - // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" - "ModelPath": "", - // Max number of tokens supported by the model - "MaxTokenTotal": 4096 - // Optional parameters - // "GpuLayerCount": 32, - // "Seed": 1337, + "TextModel": { + // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + }, + "EmbeddingModel": { + // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + } }, "SimpleVectorDb": { // Options: "Disk" or "Volatile". Volatile data is lost after each execution. diff --git a/extensions/LlamaSharp/LlamaSharp/DependencyInjection.cs b/extensions/LlamaSharp/LlamaSharp/DependencyInjection.cs index 62ebe96bb..5b95ebceb 100644 --- a/extensions/LlamaSharp/LlamaSharp/DependencyInjection.cs +++ b/extensions/LlamaSharp/LlamaSharp/DependencyInjection.cs @@ -20,23 +20,67 @@ public static IKernelMemoryBuilder WithLlamaTextGeneration( uint maxTokenTotal, ITextTokenizer? textTokenizer = null) { - var config = new LlamaSharpConfig + var config = new LlamaSharpModelConfig { ModelPath = modelPath, MaxTokenTotal = maxTokenTotal }; - builder.Services.AddLlamaTextGeneration(config, textTokenizer); + builder.Services.AddLlamaSharpTextGeneration(config, textTokenizer); + + return builder; + } + + public static IKernelMemoryBuilder WithLlamaTextEmbeddingGeneration( + this IKernelMemoryBuilder builder, + string modelPath, + uint maxTokenTotal, + ITextTokenizer? textTokenizer = null) + { + var config = new LlamaSharpModelConfig + { + ModelPath = modelPath, + MaxTokenTotal = maxTokenTotal + }; + + builder.Services.AddLlamaSharpTextEmbeddingGeneration(config, textTokenizer); return builder; } public static IKernelMemoryBuilder WithLlamaTextGeneration( + this IKernelMemoryBuilder builder, + LlamaSharpModelConfig config, + ITextTokenizer? textTokenizer = null) + { + builder.Services.AddLlamaSharpTextGeneration(config, textTokenizer); + return builder; + } + + public static IKernelMemoryBuilder WithLlamaTextEmbeddingGeneration( + this IKernelMemoryBuilder builder, + LlamaSharpModelConfig config, + ITextTokenizer? textTokenizer = null) + { + builder.Services.AddLlamaSharpTextEmbeddingGeneration(config, textTokenizer); + return builder; + } + + public static IKernelMemoryBuilder WithLlamaTextGeneration( + this IKernelMemoryBuilder builder, + LlamaSharpConfig config, + ITextTokenizer? textTokenizer = null) + { + builder.Services.AddLlamaSharpTextGeneration(config.TextModel, textTokenizer); + return builder; + } + + public static IKernelMemoryBuilder WithLlamaTextEmbeddingGeneration( this IKernelMemoryBuilder builder, LlamaSharpConfig config, ITextTokenizer? textTokenizer = null) { - builder.Services.AddLlamaTextGeneration(config, textTokenizer); + builder.Services.AddLlamaSharpTextEmbeddingGeneration(config.EmbeddingModel, textTokenizer); return builder; } } @@ -46,9 +90,9 @@ public static IKernelMemoryBuilder WithLlamaTextGeneration( /// public static partial class DependencyInjection { - public static IServiceCollection AddLlamaTextGeneration( + public static IServiceCollection AddLlamaSharpTextGeneration( this IServiceCollection services, - LlamaSharpConfig config, + LlamaSharpModelConfig config, ITextTokenizer? textTokenizer = null) { config.Validate(); @@ -58,4 +102,17 @@ public static IServiceCollection AddLlamaTextGeneration( textTokenizer: textTokenizer, loggerFactory: serviceProvider.GetService())); } + + public static IServiceCollection AddLlamaSharpTextEmbeddingGeneration( + this IServiceCollection services, + LlamaSharpModelConfig config, + ITextTokenizer? textTokenizer = null) + { + config.Validate(); + return services + .AddSingleton(serviceProvider => new LlamaSharpTextEmbeddingGenerator( + config: config, + textTokenizer: textTokenizer, + loggerFactory: serviceProvider.GetService())); + } } diff --git a/extensions/LlamaSharp/LlamaSharp/LlamaSharpConfig.cs b/extensions/LlamaSharp/LlamaSharp/LlamaSharpConfig.cs index c87b7b290..a88a68032 100644 --- a/extensions/LlamaSharp/LlamaSharp/LlamaSharpConfig.cs +++ b/extensions/LlamaSharp/LlamaSharp/LlamaSharpConfig.cs @@ -1,7 +1,5 @@ // Copyright (c) Microsoft. All rights reserved. -using System.IO; - #pragma warning disable IDE0130 // reduce number of "using" statements // ReSharper disable once CheckNamespace - reduce number of "using" statements namespace Microsoft.KernelMemory; @@ -9,33 +7,21 @@ namespace Microsoft.KernelMemory; public class LlamaSharpConfig { /// - /// Path to the *.gguf file. + /// Settings for the model used for text generation. Chat models can be used too. /// - public string ModelPath { get; set; } = ""; + public LlamaSharpModelConfig TextModel { get; set; } = new(); /// - /// Max number of tokens supported by the model. + /// Settings for the model used for text embedding generation. /// - public uint MaxTokenTotal { get; set; } = 4096; - - /// - /// Optional, number of GPU layers - /// - public int? GpuLayerCount { get; set; } + public LlamaSharpModelConfig EmbeddingModel { get; set; } = new(); /// /// Verify that the current state is valid. /// public void Validate(bool allowIO = true) { - if (string.IsNullOrWhiteSpace(this.ModelPath)) - { - throw new ConfigurationException($"LlamaSharp: {nameof(this.ModelPath)} is empty"); - } - - if (allowIO && !File.Exists(this.ModelPath)) - { - throw new ConfigurationException($"LlamaSharp: {nameof(this.ModelPath)} file not found"); - } + this.TextModel.Validate(); + this.EmbeddingModel.Validate(); } } diff --git a/extensions/LlamaSharp/LlamaSharp/LlamaSharpModelConfig.cs b/extensions/LlamaSharp/LlamaSharp/LlamaSharpModelConfig.cs new file mode 100644 index 000000000..c331a4c36 --- /dev/null +++ b/extensions/LlamaSharp/LlamaSharp/LlamaSharpModelConfig.cs @@ -0,0 +1,41 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.IO; + +#pragma warning disable IDE0130 // reduce number of "using" statements +// ReSharper disable once CheckNamespace - reduce number of "using" statements +namespace Microsoft.KernelMemory; + +public class LlamaSharpModelConfig +{ + /// + /// Path to the *.gguf file. + /// + public string ModelPath { get; set; } = ""; + + /// + /// Max number of tokens supported by the model. + /// + public uint MaxTokenTotal { get; set; } = 4096; + + /// + /// Optional, number of GPU layers + /// + public int? GpuLayerCount { get; set; } + + /// + /// Verify that the current state is valid. + /// + public void Validate(bool allowIO = true) + { + if (string.IsNullOrWhiteSpace(this.ModelPath)) + { + throw new ConfigurationException($"LlamaSharp: {nameof(this.ModelPath)} is empty"); + } + + if (allowIO && !File.Exists(this.ModelPath)) + { + throw new ConfigurationException($"LlamaSharp: {nameof(this.ModelPath)} file not found"); + } + } +} diff --git a/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextEmbeddingGenerator.cs b/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextEmbeddingGenerator.cs new file mode 100644 index 000000000..2478e1565 --- /dev/null +++ b/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextEmbeddingGenerator.cs @@ -0,0 +1,93 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using LLama; +using LLama.Common; +using LLama.Native; +using Microsoft.Extensions.Logging; +using Microsoft.KernelMemory.Diagnostics; + +namespace Microsoft.KernelMemory.AI.LlamaSharp; + +public sealed class LlamaSharpTextEmbeddingGenerator : ITextEmbeddingGenerator, IDisposable +{ + private readonly LLamaEmbedder _embedder; + private readonly LLamaWeights _model; + private readonly LLamaContext _context; + private readonly ITextTokenizer _textTokenizer; + private readonly ILogger _log; + + /// + /// Create new instance + /// + /// Configuration settings + /// Optional text tokenizer, replacing the one provided by the model + /// Application logger instance + public LlamaSharpTextEmbeddingGenerator( + LlamaSharpModelConfig config, + ITextTokenizer? textTokenizer = null, + ILoggerFactory? loggerFactory = null) + { + this._log = (loggerFactory ?? DefaultLogger.Factory).CreateLogger(); + + config.Validate(); + this.MaxTokens = (int)config.MaxTokenTotal; + + var parameters = new ModelParams(config.ModelPath) + { + ContextSize = config.MaxTokenTotal, + GpuLayerCount = config.GpuLayerCount ?? 20, + Embeddings = true, + PoolingType = LLamaPoolingType.None, + }; + + var modelFilename = config.ModelPath.Split('/').Last().Split('\\').Last(); + this._log.LogDebug("Loading LLama model: {1}", modelFilename); + + this._model = LLamaWeights.LoadFromFile(parameters); + this._context = this._model.CreateContext(parameters); + this._log.LogDebug("LLama model loaded"); + + this._embedder = new LLamaEmbedder(this._model, parameters); + this._textTokenizer = textTokenizer ?? new LlamaSharpTokenizer(this._context); + } + + /// + public int MaxTokens { get; } + + /// + public int CountTokens(string text) + { + return this._textTokenizer.CountTokens(text); + } + + /// + public IReadOnlyList GetTokens(string text) + { + return this._textTokenizer.GetTokens(text); + } + + /// + public async Task GenerateEmbeddingAsync(string text, CancellationToken cancellationToken = default) + { + if (this._log.IsEnabled(LogLevel.Trace)) + { + this._log.LogTrace("Generating embedding, input token size: {0}, text length: {1}", this._textTokenizer.CountTokens(text), text.Length); + } + + IReadOnlyList embeddings = await this._embedder.GetEmbeddings(text, cancellationToken).ConfigureAwait(false); + return new Embedding(embeddings[0]); + } + + /// + public void Dispose() + { + this._embedder.Dispose(); + this._model.Dispose(); + this._context.Dispose(); + } +} diff --git a/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs b/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs index 67c0e1ae4..a0026b3c4 100644 --- a/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs +++ b/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs @@ -34,7 +34,7 @@ public sealed class LlamaSharpTextGenerator : ITextGenerator, IDisposable /// Optional text tokenizer, replacing the one provided by the model /// Application logger instance public LlamaSharpTextGenerator( - LlamaSharpConfig config, + LlamaSharpModelConfig config, ITextTokenizer? textTokenizer = null, ILoggerFactory? loggerFactory = null) { diff --git a/service/Service/ServiceConfiguration.cs b/service/Service/ServiceConfiguration.cs index 69db00e82..65c638c6a 100644 --- a/service/Service/ServiceConfiguration.cs +++ b/service/Service/ServiceConfiguration.cs @@ -244,6 +244,15 @@ private void ConfigureIngestionEmbeddingGenerators(IKernelMemoryBuilder builder) break; } + case string x when x.Equals("LlamaSharp", StringComparison.OrdinalIgnoreCase): + { + var instance = this.GetServiceInstance(builder, + s => s.AddLlamaSharpTextEmbeddingGeneration( + config: this.GetServiceConfig("LlamaSharp").EmbeddingModel)); + builder.AddIngestionEmbeddingGenerator(instance); + break; + } + default: // NOOP - allow custom implementations, via WithCustomEmbeddingGeneration() break; @@ -395,6 +404,11 @@ private void ConfigureRetrievalEmbeddingGenerator(IKernelMemoryBuilder builder) textTokenizer: new GPT4oTokenizer()); break; + case string x when x.Equals("LlamaSharp", StringComparison.OrdinalIgnoreCase): + builder.Services.AddLlamaSharpTextEmbeddingGeneration( + config: this.GetServiceConfig("LlamaSharp").EmbeddingModel); + break; + default: // NOOP - allow custom implementations, via WithCustomEmbeddingGeneration() break; @@ -479,7 +493,8 @@ private void ConfigureTextGenerator(IKernelMemoryBuilder builder) break; case string x when x.Equals("LlamaSharp", StringComparison.OrdinalIgnoreCase): - builder.Services.AddLlamaTextGeneration(this.GetServiceConfig("LlamaSharp")); + builder.Services.AddLlamaSharpTextGeneration( + config: this.GetServiceConfig("LlamaSharp").TextModel); break; default: diff --git a/service/Service/appsettings.json b/service/Service/appsettings.json index e19971b5c..d132ed4b6 100644 --- a/service/Service/appsettings.json +++ b/service/Service/appsettings.json @@ -383,13 +383,22 @@ "Replicas": 0 }, "LlamaSharp": { - // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" - "ModelPath": "", - // Max number of tokens supported by the model - "MaxTokenTotal": 4096 - // Optional parameters - // "GpuLayerCount": 32, - // "Seed": 1337, + "TextModel": { + // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + }, + "EmbeddingModel": { + // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + } }, "MongoDbAtlas": { "ConnectionString": "mongodb://root:root@localhost:27777/?authSource=admin", diff --git a/service/tests/Core.FunctionalTests/appsettings.json b/service/tests/Core.FunctionalTests/appsettings.json index bcfbb4103..2b0e90b31 100644 --- a/service/tests/Core.FunctionalTests/appsettings.json +++ b/service/tests/Core.FunctionalTests/appsettings.json @@ -15,13 +15,22 @@ "UseHybridSearch": false }, "LlamaSharp": { - // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" - "ModelPath": "", - // Max number of tokens supported by the model - "MaxTokenTotal": 4096 - // Optional parameters - // "GpuLayerCount": 32, - // "Seed": 1337, + "TextModel": { + // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + }, + "EmbeddingModel": { + // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + } }, "MongoDbAtlas": { "ConnectionString": "mongodb://localhost:27017/?directConnection=true&serverSelectionTimeoutMS=2000", diff --git a/service/tests/Service.FunctionalTests/appsettings.json b/service/tests/Service.FunctionalTests/appsettings.json index b0cfcd5af..0d823d268 100644 --- a/service/tests/Service.FunctionalTests/appsettings.json +++ b/service/tests/Service.FunctionalTests/appsettings.json @@ -42,13 +42,22 @@ "APIKey": "" }, "LlamaSharp": { - // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" - "ModelPath": "", - // Max number of tokens supported by the model - "MaxTokenTotal": 4096 - // Optional parameters - // "GpuLayerCount": 32, - // "Seed": 1337, + "TextModel": { + // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + }, + "EmbeddingModel": { + // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf" + "ModelPath": "", + // Max number of tokens supported by the model + "MaxTokenTotal": 4096 + // Optional parameters + // "GpuLayerCount": 32, + } } } },