diff --git a/applications/tests/Evaluation.Tests/appsettings.json b/applications/tests/Evaluation.Tests/appsettings.json
index 3b0e8c07f..6b9c02d2d 100644
--- a/applications/tests/Evaluation.Tests/appsettings.json
+++ b/applications/tests/Evaluation.Tests/appsettings.json
@@ -96,13 +96,22 @@
         "MaxRetries": 10
       },
       "LlamaSharp": {
-        // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
-        "ModelPath": "",
-        // Max number of tokens supported by the model
-        "MaxTokenTotal": 4096
-        // Optional parameters
-        // "GpuLayerCount": 32,
-        // "Seed": 1337,
+        "TextModel": {
+          // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        },
+        "EmbeddingModel": {
+          // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        }
       }
     },
     "DataIngestion": {
diff --git a/examples/001-dotnet-WebClient/file9-settings.json b/examples/001-dotnet-WebClient/file9-settings.json
index e98ef739e..03ae0f9ed 100644
--- a/examples/001-dotnet-WebClient/file9-settings.json
+++ b/examples/001-dotnet-WebClient/file9-settings.json
@@ -65,13 +65,22 @@
         "MaxRetries": 10
       },
       "LlamaSharp": {
-        // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
-        "ModelPath": "",
-        // Max number of tokens supported by the model
-        "MaxTokenTotal": 4096
-        // Optional parameters
-        // "GpuLayerCount": 32,
-        // "Seed": 1337,
+        "TextModel": {
+          // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        },
+        "EmbeddingModel": {
+          // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        }
       },
       "AzureAIDocIntel": {
         // "APIKey" or "AzureIdentity".
diff --git a/examples/002-dotnet-Serverless/appsettings.json b/examples/002-dotnet-Serverless/appsettings.json
index 2a0b16831..8e060a1df 100644
--- a/examples/002-dotnet-Serverless/appsettings.json
+++ b/examples/002-dotnet-Serverless/appsettings.json
@@ -120,13 +120,22 @@
         // "Endpoint": "https://s3.amazonaws.com"
       },
       "LlamaSharp": {
-        // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
-        "ModelPath": "",
-        // Max number of tokens supported by the model
-        "MaxTokenTotal": 4096
-        // Optional parameters
-        // "GpuLayerCount": 32,
-        // "Seed": 1337,
+        "TextModel": {
+          // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        },
+        "EmbeddingModel": {
+          // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        }
       }
     },
     "DataIngestion": {
diff --git a/examples/002-dotnet-Serverless/file9-settings.json b/examples/002-dotnet-Serverless/file9-settings.json
index e98ef739e..03ae0f9ed 100644
--- a/examples/002-dotnet-Serverless/file9-settings.json
+++ b/examples/002-dotnet-Serverless/file9-settings.json
@@ -65,13 +65,22 @@
         "MaxRetries": 10
       },
       "LlamaSharp": {
-        // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
-        "ModelPath": "",
-        // Max number of tokens supported by the model
-        "MaxTokenTotal": 4096
-        // Optional parameters
-        // "GpuLayerCount": 32,
-        // "Seed": 1337,
+        "TextModel": {
+          // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        },
+        "EmbeddingModel": {
+          // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        }
       },
       "AzureAIDocIntel": {
         // "APIKey" or "AzureIdentity".
diff --git a/examples/105-dotnet-serverless-llamasharp/appsettings.json b/examples/105-dotnet-serverless-llamasharp/appsettings.json
index d8ebcecb7..da78f1801 100644
--- a/examples/105-dotnet-serverless-llamasharp/appsettings.json
+++ b/examples/105-dotnet-serverless-llamasharp/appsettings.json
@@ -15,13 +15,22 @@
   "KernelMemory": {
     "Services": {
       "LlamaSharp": {
-        // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
-        "ModelPath": "",
-        // Max number of tokens supported by the model
-        "MaxTokenTotal": 4096
-        // Optional parameters
-        // "GpuLayerCount": 32,
-        // "Seed": 1337,
+        "TextModel": {
+          // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        },
+        "EmbeddingModel": {
+          // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        }
       },
       "AzureOpenAIEmbedding": {
         // "ApiKey" or "AzureIdentity"
diff --git a/examples/210-KM-without-builder/appsettings.json b/examples/210-KM-without-builder/appsettings.json
index 5fa67ec65..83857dd2c 100644
--- a/examples/210-KM-without-builder/appsettings.json
+++ b/examples/210-KM-without-builder/appsettings.json
@@ -351,13 +351,22 @@
         "Replicas": 0
       },
       "LlamaSharp": {
-        // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
-        "ModelPath": "",
-        // Max number of tokens supported by the model
-        "MaxTokenTotal": 4096
-        // Optional parameters
-        // "GpuLayerCount": 32,
-        // "Seed": 1337,
+        "TextModel": {
+          // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        },
+        "EmbeddingModel": {
+          // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        }
       },
       "MongoDbAtlas": {
         "ConnectionString": "mongodb://root:root@localhost:27777/?authSource=admin",
diff --git a/examples/401-evaluation/appsettings.json b/examples/401-evaluation/appsettings.json
index 694fd3115..1bb5005d3 100644
--- a/examples/401-evaluation/appsettings.json
+++ b/examples/401-evaluation/appsettings.json
@@ -103,13 +103,22 @@
         "MaxEmbeddingBatchSize": 100
       },
       "LlamaSharp": {
-        // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
-        "ModelPath": "",
-        // Max number of tokens supported by the model
-        "MaxTokenTotal": 4096
-        // Optional parameters
-        // "GpuLayerCount": 32,
-        // "Seed": 1337,
+        "TextModel": {
+          // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        },
+        "EmbeddingModel": {
+          // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        }
       }
     },
     "DataIngestion": {
diff --git a/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextEmbeddingGeneratorTest.cs b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextEmbeddingGeneratorTest.cs
new file mode 100644
index 000000000..922b4b498
--- /dev/null
+++ b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextEmbeddingGeneratorTest.cs
@@ -0,0 +1,47 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using Microsoft.KernelMemory;
+using Microsoft.KernelMemory.AI.LlamaSharp;
+using Microsoft.KM.TestHelpers;
+using Xunit.Abstractions;
+
+namespace Microsoft.LlamaSharp.FunctionalTests;
+
+public sealed class LlamaSharpTextEmbeddingGeneratorTest : BaseFunctionalTestCase
+{
+    private readonly LlamaSharpTextEmbeddingGenerator _target;
+
+    public LlamaSharpTextEmbeddingGeneratorTest(
+        IConfiguration cfg,
+        ITestOutputHelper output) : base(cfg, output)
+    {
+        this.LlamaSharpConfig.Validate();
+        this._target = new LlamaSharpTextEmbeddingGenerator(this.LlamaSharpConfig.EmbeddingModel, loggerFactory: null);
+        var modelFilename = this.LlamaSharpConfig.TextModel.ModelPath.Split('/').Last().Split('\\').Last();
+        Console.WriteLine($"Model in use: {modelFilename}");
+    }
+
+    [Fact]
+    [Trait("Category", "LlamaSharp")]
+    public async Task ItGeneratesEmbeddingVectors()
+    {
+        // Act
+        Embedding embedding = await this._target.GenerateEmbeddingAsync("some text");
+
+        // Assert
+        Console.WriteLine("Embedding size: " + embedding.Length);
+
+        // Expected result using nomic-embed-text-v1.5.Q8_0.gguf
+        Assert.Equal(768, embedding.Length);
+    }
+
+    protected override void Dispose(bool disposing)
+    {
+        if (disposing)
+        {
+            this._target.Dispose();
+        }
+
+        base.Dispose(disposing);
+    }
+}
diff --git a/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextGeneratorTest.cs b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextGeneratorTest.cs
index cfb575aa5..50016dd0a 100644
--- a/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextGeneratorTest.cs
+++ b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTextGeneratorTest.cs
@@ -21,8 +21,8 @@ public LlamaSharpTextGeneratorTest(
         this._timer = new Stopwatch();
 
         this.LlamaSharpConfig.Validate();
-        this._target = new LlamaSharpTextGenerator(this.LlamaSharpConfig, loggerFactory: null);
-        var modelFilename = this.LlamaSharpConfig.ModelPath.Split('/').Last().Split('\\').Last();
+        this._target = new LlamaSharpTextGenerator(this.LlamaSharpConfig.TextModel, loggerFactory: null);
+        var modelFilename = this.LlamaSharpConfig.TextModel.ModelPath.Split('/').Last().Split('\\').Last();
         Console.WriteLine($"Model in use: {modelFilename}");
     }
 
diff --git a/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTokenizerTest.cs b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTokenizerTest.cs
index a78ee0864..50e1c2276 100644
--- a/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTokenizerTest.cs
+++ b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/LlamaSharpTokenizerTest.cs
@@ -18,13 +18,13 @@ public LlamaSharpTokenizerTest(
     {
         this.LlamaSharpConfig.Validate();
 
-        var modelFilename = this.LlamaSharpConfig.ModelPath.Split('/').Last().Split('\\').Last();
+        var modelFilename = this.LlamaSharpConfig.TextModel.ModelPath.Split('/').Last().Split('\\').Last();
         Console.WriteLine($"Model in use: {modelFilename}");
 
-        var parameters = new ModelParams(this.LlamaSharpConfig.ModelPath)
+        var parameters = new ModelParams(this.LlamaSharpConfig.TextModel.ModelPath)
         {
-            ContextSize = this.LlamaSharpConfig.MaxTokenTotal,
-            GpuLayerCount = this.LlamaSharpConfig.GpuLayerCount ?? 20,
+            ContextSize = this.LlamaSharpConfig.TextModel.MaxTokenTotal,
+            GpuLayerCount = this.LlamaSharpConfig.TextModel.GpuLayerCount ?? 20,
         };
 
         LLamaWeights model = LLamaWeights.LoadFromFile(parameters);
diff --git a/extensions/LlamaSharp/LlamaSharp.FunctionalTests/appsettings.json b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/appsettings.json
index f4653ee2d..2c2a73001 100644
--- a/extensions/LlamaSharp/LlamaSharp.FunctionalTests/appsettings.json
+++ b/extensions/LlamaSharp/LlamaSharp.FunctionalTests/appsettings.json
@@ -6,13 +6,22 @@
     },
     "Services": {
       "LlamaSharp": {
-        // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
-        "ModelPath": "",
-        // Max number of tokens supported by the model
-        "MaxTokenTotal": 4096
-        // Optional parameters
-        // "GpuLayerCount": 32,
-        // "Seed": 1337,
+        "TextModel": {
+          // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        },
+        "EmbeddingModel": {
+          // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        }
       },
       "SimpleVectorDb": {
         // Options: "Disk" or "Volatile". Volatile data is lost after each execution.
diff --git a/extensions/LlamaSharp/LlamaSharp/DependencyInjection.cs b/extensions/LlamaSharp/LlamaSharp/DependencyInjection.cs
index 62ebe96bb..5b95ebceb 100644
--- a/extensions/LlamaSharp/LlamaSharp/DependencyInjection.cs
+++ b/extensions/LlamaSharp/LlamaSharp/DependencyInjection.cs
@@ -20,23 +20,67 @@ public static IKernelMemoryBuilder WithLlamaTextGeneration(
         uint maxTokenTotal,
         ITextTokenizer? textTokenizer = null)
     {
-        var config = new LlamaSharpConfig
+        var config = new LlamaSharpModelConfig
         {
             ModelPath = modelPath,
             MaxTokenTotal = maxTokenTotal
         };
 
-        builder.Services.AddLlamaTextGeneration(config, textTokenizer);
+        builder.Services.AddLlamaSharpTextGeneration(config, textTokenizer);
+
+        return builder;
+    }
+
+    public static IKernelMemoryBuilder WithLlamaTextEmbeddingGeneration(
+        this IKernelMemoryBuilder builder,
+        string modelPath,
+        uint maxTokenTotal,
+        ITextTokenizer? textTokenizer = null)
+    {
+        var config = new LlamaSharpModelConfig
+        {
+            ModelPath = modelPath,
+            MaxTokenTotal = maxTokenTotal
+        };
+
+        builder.Services.AddLlamaSharpTextEmbeddingGeneration(config, textTokenizer);
 
         return builder;
     }
 
     public static IKernelMemoryBuilder WithLlamaTextGeneration(
+        this IKernelMemoryBuilder builder,
+        LlamaSharpModelConfig config,
+        ITextTokenizer? textTokenizer = null)
+    {
+        builder.Services.AddLlamaSharpTextGeneration(config, textTokenizer);
+        return builder;
+    }
+
+    public static IKernelMemoryBuilder WithLlamaTextEmbeddingGeneration(
+        this IKernelMemoryBuilder builder,
+        LlamaSharpModelConfig config,
+        ITextTokenizer? textTokenizer = null)
+    {
+        builder.Services.AddLlamaSharpTextEmbeddingGeneration(config, textTokenizer);
+        return builder;
+    }
+
+    public static IKernelMemoryBuilder WithLlamaTextGeneration(
+        this IKernelMemoryBuilder builder,
+        LlamaSharpConfig config,
+        ITextTokenizer? textTokenizer = null)
+    {
+        builder.Services.AddLlamaSharpTextGeneration(config.TextModel, textTokenizer);
+        return builder;
+    }
+
+    public static IKernelMemoryBuilder WithLlamaTextEmbeddingGeneration(
         this IKernelMemoryBuilder builder,
         LlamaSharpConfig config,
         ITextTokenizer? textTokenizer = null)
     {
-        builder.Services.AddLlamaTextGeneration(config, textTokenizer);
+        builder.Services.AddLlamaSharpTextEmbeddingGeneration(config.EmbeddingModel, textTokenizer);
         return builder;
     }
 }
@@ -46,9 +90,9 @@ public static IKernelMemoryBuilder WithLlamaTextGeneration(
 /// </summary>
 public static partial class DependencyInjection
 {
-    public static IServiceCollection AddLlamaTextGeneration(
+    public static IServiceCollection AddLlamaSharpTextGeneration(
         this IServiceCollection services,
-        LlamaSharpConfig config,
+        LlamaSharpModelConfig config,
         ITextTokenizer? textTokenizer = null)
     {
         config.Validate();
@@ -58,4 +102,17 @@ public static IServiceCollection AddLlamaTextGeneration(
                 textTokenizer: textTokenizer,
                 loggerFactory: serviceProvider.GetService<ILoggerFactory>()));
     }
+
+    public static IServiceCollection AddLlamaSharpTextEmbeddingGeneration(
+        this IServiceCollection services,
+        LlamaSharpModelConfig config,
+        ITextTokenizer? textTokenizer = null)
+    {
+        config.Validate();
+        return services
+            .AddSingleton<ITextEmbeddingGenerator, LlamaSharpTextEmbeddingGenerator>(serviceProvider => new LlamaSharpTextEmbeddingGenerator(
+                config: config,
+                textTokenizer: textTokenizer,
+                loggerFactory: serviceProvider.GetService<ILoggerFactory>()));
+    }
 }
diff --git a/extensions/LlamaSharp/LlamaSharp/LlamaSharpConfig.cs b/extensions/LlamaSharp/LlamaSharp/LlamaSharpConfig.cs
index c87b7b290..a88a68032 100644
--- a/extensions/LlamaSharp/LlamaSharp/LlamaSharpConfig.cs
+++ b/extensions/LlamaSharp/LlamaSharp/LlamaSharpConfig.cs
@@ -1,7 +1,5 @@
 ﻿// Copyright (c) Microsoft. All rights reserved.
 
-using System.IO;
-
 #pragma warning disable IDE0130 // reduce number of "using" statements
 // ReSharper disable once CheckNamespace - reduce number of "using" statements
 namespace Microsoft.KernelMemory;
@@ -9,33 +7,21 @@ namespace Microsoft.KernelMemory;
 public class LlamaSharpConfig
 {
     /// <summary>
-    /// Path to the *.gguf file.
+    /// Settings for the model used for text generation. Chat models can be used too.
     /// </summary>
-    public string ModelPath { get; set; } = "";
+    public LlamaSharpModelConfig TextModel { get; set; } = new();
 
     /// <summary>
-    /// Max number of tokens supported by the model.
+    /// Settings for the model used for text embedding generation.
     /// </summary>
-    public uint MaxTokenTotal { get; set; } = 4096;
-
-    /// <summary>
-    /// Optional, number of GPU layers
-    /// </summary>
-    public int? GpuLayerCount { get; set; }
+    public LlamaSharpModelConfig EmbeddingModel { get; set; } = new();
 
     /// <summary>
     /// Verify that the current state is valid.
     /// </summary>
     public void Validate(bool allowIO = true)
     {
-        if (string.IsNullOrWhiteSpace(this.ModelPath))
-        {
-            throw new ConfigurationException($"LlamaSharp: {nameof(this.ModelPath)} is empty");
-        }
-
-        if (allowIO && !File.Exists(this.ModelPath))
-        {
-            throw new ConfigurationException($"LlamaSharp: {nameof(this.ModelPath)} file not found");
-        }
+        this.TextModel.Validate();
+        this.EmbeddingModel.Validate();
     }
 }
diff --git a/extensions/LlamaSharp/LlamaSharp/LlamaSharpModelConfig.cs b/extensions/LlamaSharp/LlamaSharp/LlamaSharpModelConfig.cs
new file mode 100644
index 000000000..c331a4c36
--- /dev/null
+++ b/extensions/LlamaSharp/LlamaSharp/LlamaSharpModelConfig.cs
@@ -0,0 +1,41 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System.IO;
+
+#pragma warning disable IDE0130 // reduce number of "using" statements
+// ReSharper disable once CheckNamespace - reduce number of "using" statements
+namespace Microsoft.KernelMemory;
+
+public class LlamaSharpModelConfig
+{
+    /// <summary>
+    /// Path to the *.gguf file.
+    /// </summary>
+    public string ModelPath { get; set; } = "";
+
+    /// <summary>
+    /// Max number of tokens supported by the model.
+    /// </summary>
+    public uint MaxTokenTotal { get; set; } = 4096;
+
+    /// <summary>
+    /// Optional, number of GPU layers
+    /// </summary>
+    public int? GpuLayerCount { get; set; }
+
+    /// <summary>
+    /// Verify that the current state is valid.
+    /// </summary>
+    public void Validate(bool allowIO = true)
+    {
+        if (string.IsNullOrWhiteSpace(this.ModelPath))
+        {
+            throw new ConfigurationException($"LlamaSharp: {nameof(this.ModelPath)} is empty");
+        }
+
+        if (allowIO && !File.Exists(this.ModelPath))
+        {
+            throw new ConfigurationException($"LlamaSharp: {nameof(this.ModelPath)} file not found");
+        }
+    }
+}
diff --git a/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextEmbeddingGenerator.cs b/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextEmbeddingGenerator.cs
new file mode 100644
index 000000000..2478e1565
--- /dev/null
+++ b/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextEmbeddingGenerator.cs
@@ -0,0 +1,93 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Threading;
+using System.Threading.Tasks;
+using LLama;
+using LLama.Common;
+using LLama.Native;
+using Microsoft.Extensions.Logging;
+using Microsoft.KernelMemory.Diagnostics;
+
+namespace Microsoft.KernelMemory.AI.LlamaSharp;
+
+public sealed class LlamaSharpTextEmbeddingGenerator : ITextEmbeddingGenerator, IDisposable
+{
+    private readonly LLamaEmbedder _embedder;
+    private readonly LLamaWeights _model;
+    private readonly LLamaContext _context;
+    private readonly ITextTokenizer _textTokenizer;
+    private readonly ILogger<LlamaSharpTextGenerator> _log;
+
+    /// <summary>
+    /// Create new instance
+    /// </summary>
+    /// <param name="config">Configuration settings</param>
+    /// <param name="textTokenizer">Optional text tokenizer, replacing the one provided by the model</param>
+    /// <param name="loggerFactory">Application logger instance</param>
+    public LlamaSharpTextEmbeddingGenerator(
+        LlamaSharpModelConfig config,
+        ITextTokenizer? textTokenizer = null,
+        ILoggerFactory? loggerFactory = null)
+    {
+        this._log = (loggerFactory ?? DefaultLogger.Factory).CreateLogger<LlamaSharpTextGenerator>();
+
+        config.Validate();
+        this.MaxTokens = (int)config.MaxTokenTotal;
+
+        var parameters = new ModelParams(config.ModelPath)
+        {
+            ContextSize = config.MaxTokenTotal,
+            GpuLayerCount = config.GpuLayerCount ?? 20,
+            Embeddings = true,
+            PoolingType = LLamaPoolingType.None,
+        };
+
+        var modelFilename = config.ModelPath.Split('/').Last().Split('\\').Last();
+        this._log.LogDebug("Loading LLama model: {1}", modelFilename);
+
+        this._model = LLamaWeights.LoadFromFile(parameters);
+        this._context = this._model.CreateContext(parameters);
+        this._log.LogDebug("LLama model loaded");
+
+        this._embedder = new LLamaEmbedder(this._model, parameters);
+        this._textTokenizer = textTokenizer ?? new LlamaSharpTokenizer(this._context);
+    }
+
+    /// <inheritdoc/>
+    public int MaxTokens { get; }
+
+    /// <inheritdoc/>
+    public int CountTokens(string text)
+    {
+        return this._textTokenizer.CountTokens(text);
+    }
+
+    /// <inheritdoc/>
+    public IReadOnlyList<string> GetTokens(string text)
+    {
+        return this._textTokenizer.GetTokens(text);
+    }
+
+    /// <inheritdoc/>
+    public async Task<Embedding> GenerateEmbeddingAsync(string text, CancellationToken cancellationToken = default)
+    {
+        if (this._log.IsEnabled(LogLevel.Trace))
+        {
+            this._log.LogTrace("Generating embedding, input token size: {0}, text length: {1}", this._textTokenizer.CountTokens(text), text.Length);
+        }
+
+        IReadOnlyList<float[]> embeddings = await this._embedder.GetEmbeddings(text, cancellationToken).ConfigureAwait(false);
+        return new Embedding(embeddings[0]);
+    }
+
+    /// <inheritdoc/>
+    public void Dispose()
+    {
+        this._embedder.Dispose();
+        this._model.Dispose();
+        this._context.Dispose();
+    }
+}
diff --git a/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs b/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs
index 67c0e1ae4..a0026b3c4 100644
--- a/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs
+++ b/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs
@@ -34,7 +34,7 @@ public sealed class LlamaSharpTextGenerator : ITextGenerator, IDisposable
     /// <param name="textTokenizer">Optional text tokenizer, replacing the one provided by the model</param>
     /// <param name="loggerFactory">Application logger instance</param>
     public LlamaSharpTextGenerator(
-        LlamaSharpConfig config,
+        LlamaSharpModelConfig config,
         ITextTokenizer? textTokenizer = null,
         ILoggerFactory? loggerFactory = null)
     {
diff --git a/service/Service/ServiceConfiguration.cs b/service/Service/ServiceConfiguration.cs
index 69db00e82..65c638c6a 100644
--- a/service/Service/ServiceConfiguration.cs
+++ b/service/Service/ServiceConfiguration.cs
@@ -244,6 +244,15 @@ private void ConfigureIngestionEmbeddingGenerators(IKernelMemoryBuilder builder)
                     break;
                 }
 
+                case string x when x.Equals("LlamaSharp", StringComparison.OrdinalIgnoreCase):
+                {
+                    var instance = this.GetServiceInstance<ITextEmbeddingGenerator>(builder,
+                        s => s.AddLlamaSharpTextEmbeddingGeneration(
+                            config: this.GetServiceConfig<LlamaSharpConfig>("LlamaSharp").EmbeddingModel));
+                    builder.AddIngestionEmbeddingGenerator(instance);
+                    break;
+                }
+
                 default:
                     // NOOP - allow custom implementations, via WithCustomEmbeddingGeneration()
                     break;
@@ -395,6 +404,11 @@ private void ConfigureRetrievalEmbeddingGenerator(IKernelMemoryBuilder builder)
                     textTokenizer: new GPT4oTokenizer());
                 break;
 
+            case string x when x.Equals("LlamaSharp", StringComparison.OrdinalIgnoreCase):
+                builder.Services.AddLlamaSharpTextEmbeddingGeneration(
+                    config: this.GetServiceConfig<LlamaSharpConfig>("LlamaSharp").EmbeddingModel);
+                break;
+
             default:
                 // NOOP - allow custom implementations, via WithCustomEmbeddingGeneration()
                 break;
@@ -479,7 +493,8 @@ private void ConfigureTextGenerator(IKernelMemoryBuilder builder)
                 break;
 
             case string x when x.Equals("LlamaSharp", StringComparison.OrdinalIgnoreCase):
-                builder.Services.AddLlamaTextGeneration(this.GetServiceConfig<LlamaSharpConfig>("LlamaSharp"));
+                builder.Services.AddLlamaSharpTextGeneration(
+                    config: this.GetServiceConfig<LlamaSharpConfig>("LlamaSharp").TextModel);
                 break;
 
             default:
diff --git a/service/Service/appsettings.json b/service/Service/appsettings.json
index e19971b5c..d132ed4b6 100644
--- a/service/Service/appsettings.json
+++ b/service/Service/appsettings.json
@@ -383,13 +383,22 @@
         "Replicas": 0
       },
       "LlamaSharp": {
-        // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
-        "ModelPath": "",
-        // Max number of tokens supported by the model
-        "MaxTokenTotal": 4096
-        // Optional parameters
-        // "GpuLayerCount": 32,
-        // "Seed": 1337,
+        "TextModel": {
+          // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        },
+        "EmbeddingModel": {
+          // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        }
       },
       "MongoDbAtlas": {
         "ConnectionString": "mongodb://root:root@localhost:27777/?authSource=admin",
diff --git a/service/tests/Core.FunctionalTests/appsettings.json b/service/tests/Core.FunctionalTests/appsettings.json
index bcfbb4103..2b0e90b31 100644
--- a/service/tests/Core.FunctionalTests/appsettings.json
+++ b/service/tests/Core.FunctionalTests/appsettings.json
@@ -15,13 +15,22 @@
         "UseHybridSearch": false
       },
       "LlamaSharp": {
-        // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
-        "ModelPath": "",
-        // Max number of tokens supported by the model
-        "MaxTokenTotal": 4096
-        // Optional parameters
-        // "GpuLayerCount": 32,
-        // "Seed": 1337,
+        "TextModel": {
+          // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        },
+        "EmbeddingModel": {
+          // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        }
       },
       "MongoDbAtlas": {
         "ConnectionString": "mongodb://localhost:27017/?directConnection=true&serverSelectionTimeoutMS=2000",
diff --git a/service/tests/Service.FunctionalTests/appsettings.json b/service/tests/Service.FunctionalTests/appsettings.json
index b0cfcd5af..0d823d268 100644
--- a/service/tests/Service.FunctionalTests/appsettings.json
+++ b/service/tests/Service.FunctionalTests/appsettings.json
@@ -42,13 +42,22 @@
         "APIKey": ""
       },
       "LlamaSharp": {
-        // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
-        "ModelPath": "",
-        // Max number of tokens supported by the model
-        "MaxTokenTotal": 4096
-        // Optional parameters
-        // "GpuLayerCount": 32,
-        // "Seed": 1337,
+        "TextModel": {
+          // path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        },
+        "EmbeddingModel": {
+          // path to file, e.g. "nomic-embed-text-v1.5.Q8_0.gguf"
+          "ModelPath": "",
+          // Max number of tokens supported by the model
+          "MaxTokenTotal": 4096
+          // Optional parameters
+          // "GpuLayerCount": 32,
+        }
       }
     }
   },