Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion LLama.Examples/ExampleRunner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ public class ExampleRunner
{ "Batched Executor: Save/Load", BatchedExecutorSaveAndLoad.Run },
{ "Batched Executor: Fork", BatchedExecutorFork.Run },
{ "Batched Executor: Rewind", BatchedExecutorRewind.Run },
{ "Batched Executor: Guidance", BatchedExecutorGuidance.Run },
{ "Batched Executor: LLava", BatchedExecutorLLava.Run },
{ "Batched Executor: BoolQ Benchmark", BatchedExecutorBoolQ.Run },
{ "Batched Executor: Beam Search", BatchedExecutorBeamSearch.Run },
Expand Down
12 changes: 7 additions & 5 deletions LLama.Examples/Examples/BatchedExecutorBeamSearch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ from beam in oldBeam.Sample(beamsCount)
while (beams.Count > beamsCount)
{
var beam = beams[0];
AnsiConsole.MarkupLineInterpolated($"[red]Culling Beam {beam.Conversation.ConversationId} (prob:{beam.CumulativeProbability:P10})[/]: {beam}");

var text = beam.ToString().EscapeMarkup();
AnsiConsole.MarkupLine($"[red]Culling Beam {beam.Conversation.ConversationId} (prob:{beam.CumulativeProbability:P5})[/]: {text}");

beam.Dispose();
beams.RemoveAt(0);
Expand Down Expand Up @@ -121,7 +123,7 @@ public List<Beam> Sample(int nbeams)
{
// Apply softmax, this calculates probabilities and sorts tokens into descending order
var logitsArr = LLamaTokenDataArray.Create(Conversation.Sample());
logitsArr.Softmax(Conversation.Executor.Context.NativeHandle);
logitsArr.Softmax();

// Create new forked conversations, one for each beam
var results = new List<Beam>();
Expand All @@ -135,14 +137,14 @@ public List<Beam> Sample(int nbeams)
var c = Conversation.Fork();

// Extend the conversation with the selected token.
c.Prompt(item.id);
c.Prompt(item.ID);

// Keep track of the cumulative probability of this entire sequence.
var p = CumulativeProbability * item.p;
var p = CumulativeProbability * item.Probability;

// Keep track of all tokens in this sequence, for decoding later
var t = Tokens.ToList();
t.Add(item.id);
t.Add(item.ID);

// Keep track of which beam this beam was derived from.
var s = Sequence.ToList();
Expand Down
27 changes: 13 additions & 14 deletions LLama.Examples/Examples/BatchedExecutorBoolQ.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
using System.Text;
using LLama.Batched;
using LLama.Common;
using LLama.Grammars;
using LLama.Native;
using Spectre.Console;
using LLama.Sampling;
Expand All @@ -10,6 +9,9 @@ namespace LLama.Examples.Examples;

public class BatchedExecutorBoolQ
{
// Answers may start with a space, and then must produce one of the listed strings followed by a newline character and nothing else.
private static readonly Grammar AnswerGrammar = new("root ::= (\" \")? (\"true\" | \"false\" | \"yes\" | \"no\") \"\\n\"", "root");

public static async Task Run()
{
// Load model weights
Expand All @@ -21,9 +23,6 @@ public static async Task Run()
var sys = AnsiConsole.Ask("System prompt", "Answer the question with a single word answer.");
var hint = AnsiConsole.Ask("Provide hints to model (test reading comprehension instead of knowledge)", true);

// Answers may start with a space, and then must produce one of the listed strings followed by a newline character and nothing else.
var grammar = Grammar.Parse("root ::= (\" \")? (\"true\" | \"false\" | \"yes\" | \"no\") \"\\n\"", "root");

// Create an executor that can evaluate a batch of conversations together
using var executor = new BatchedExecutor(model, parameters);

Expand Down Expand Up @@ -53,7 +52,7 @@ await AnsiConsole.Progress()

foreach (var chunk in chunks)
{
var result = await RunBatch(executor, tokensGenerate, grammar, sys, hint, chunk);
var result = await RunBatch(executor, tokensGenerate, sys, hint, chunk);
results.Add(result);

reporter.Increment(1);
Expand Down Expand Up @@ -87,10 +86,10 @@ await AnsiConsole.Progress()
}
}

private static async Task<BatchResult> RunBatch(BatchedExecutor executor, int maxTokens, Grammar grammar, string sys, bool hint, IEnumerable<(string, bool, string)> batch)
private static async Task<BatchResult> RunBatch(BatchedExecutor executor, int maxTokens, string sys, bool hint, IEnumerable<(string, bool, string)> batch)
{
var conversations = (from item in batch
select new ConversationRunner(executor, grammar, sys, item.Item1, item.Item2, hint ? item.Item3 : null)).ToArray();
select new ConversationRunner(executor, sys, item.Item1, item.Item2, hint ? item.Item3 : null)).ToArray();

for (var i = 0; i < maxTokens; i++)
{
Expand Down Expand Up @@ -135,6 +134,9 @@ private record BatchResult(int TruePositive, int TrueNegative, int FalsePositive
public float Accuracy => (float)Correct / Total;
}

/// <summary>
/// All of the mechanics necessary to run a conversation to answer a single question
/// </summary>
private class ConversationRunner
: IDisposable
{
Expand All @@ -149,14 +151,11 @@ private class ConversationRunner
public string Question { get; }
public bool Answer { get; }

public ConversationRunner(BatchedExecutor executor, Grammar grammar, string sys, string question, bool answer, string? hint)
public ConversationRunner(BatchedExecutor executor, string sys, string question, bool answer, string? hint)
{
_executor = executor;
_decoder = new StreamingTokenDecoder(executor.Context);
_sampler = new GreedySamplingPipeline
{
Grammar = grammar.CreateInstance(),
};
_sampler = new GreedySamplingPipeline { Grammar = AnswerGrammar };

// Make sure question ends with question mark
if (!question.EndsWith('?'))
Expand Down Expand Up @@ -192,7 +191,7 @@ public void Sample()
if (!_conversation.RequiresSampling)
return;

var token = _sampler.Sample(_executor.Context.NativeHandle, _conversation.Sample(), []);
var token = _sampler.Sample(_executor.Context, _conversation.GetSampleIndex());

var tokens = _executor.Context.NativeHandle.ModelHandle.Tokens;
if (tokens.IsEndOfGeneration(token) || tokens.Newline == token)
Expand All @@ -216,7 +215,7 @@ public void Prompt()
var token = _sampledToken.Value;
_sampledToken = default;

_sampler.Accept(_executor.Context.NativeHandle, token);
_sampler.Accept(token);
_decoder.Add(token);
_conversation.Prompt(token);
}
Expand Down
6 changes: 1 addition & 5 deletions LLama.Examples/Examples/BatchedExecutorFork.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
using LLama.Batched;
using LLama.Common;
using LLama.Native;
using LLama.Sampling;
using Spectre.Console;

Expand Down Expand Up @@ -77,9 +76,7 @@ await AnsiConsole
// Print some stats
var timings = executor.Context.NativeHandle.GetTimings();
AnsiConsole.MarkupLine($"Total Tokens Evaluated: {timings.TokensEvaluated}");
AnsiConsole.MarkupLine($"Total Tokens Sampled: {timings.TokensSampled}");
AnsiConsole.MarkupLine($"Eval Time: {(timings.Eval + timings.PromptEval).TotalMilliseconds}ms");
AnsiConsole.MarkupLine($"Sample Time: {timings.Sampling.TotalMilliseconds}ms");
}

private class Node
Expand Down Expand Up @@ -114,8 +111,7 @@ public void Sample()

// Sample one token
var ctx = _conversation.Executor.Context.NativeHandle;
var token = _sampler.Sample(ctx, _conversation.Sample(), Array.Empty<LLamaToken>());
_sampler.Accept(ctx, token);
var token = _sampler.Sample(ctx, _conversation.GetSampleIndex());
_decoder.Add(token);

// Prompt the conversation with this token, to continue generating from there
Expand Down
123 changes: 0 additions & 123 deletions LLama.Examples/Examples/BatchedExecutorGuidance.cs

This file was deleted.

2 changes: 1 addition & 1 deletion LLama.Examples/Examples/BatchedExecutorLLava.cs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ await AnsiConsole

await executor.Infer();

var token = sampler.Sample(executor.Context.NativeHandle, conversation.Sample(), Array.Empty<LLamaToken>());
var token = sampler.Sample(executor.Context.NativeHandle, conversation.GetSampleIndex());
if (executor.Context.NativeHandle.ModelHandle.Tokens.IsEndOfGeneration(token))
break;

Expand Down
2 changes: 1 addition & 1 deletion LLama.Examples/Examples/BatchedExecutorRewind.cs
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ private class Node

public LLamaToken Sample(Conversation conversation)
{
var token = _sampler.Sample(conversation.Executor.Context.NativeHandle, conversation.Sample(), []);
var token = _sampler.Sample(conversation.Executor.Context.NativeHandle, conversation.GetSampleIndex());
_tokens.Add(token);
return token;
}
Expand Down
4 changes: 2 additions & 2 deletions LLama.Examples/Examples/BatchedExecutorSaveAndLoad.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using LLama.Batched;
using LLama.Batched;
using LLama.Common;
using LLama.Native;
using LLama.Sampling;
Expand Down Expand Up @@ -94,7 +94,7 @@ private static async Task<LLamaToken> GenerateTokens(BatchedExecutor executor, C
await executor.Infer();

// Use sampling pipeline to pick a token
token = sampler.Sample(executor.Context.NativeHandle, conversation.Sample(), ReadOnlySpan<LLamaToken>.Empty);
token = sampler.Sample(executor.Context.NativeHandle, conversation.GetSampleIndex());

// Add it to the decoder, so it can be converted into text later
decoder.Add(token);
Expand Down
3 changes: 1 addition & 2 deletions LLama.Examples/Examples/ChatChineseGB2312.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using System.Text;
using System.Text;
using LLama.Common;

namespace LLama.Examples.Examples;
Expand Down Expand Up @@ -27,7 +27,6 @@ public static async Task Run()

var parameters = new ModelParams(modelPath)
{
Seed = 1337,
GpuLayerCount = 5,
Encoding = Encoding.UTF8
};
Expand Down
1 change: 0 additions & 1 deletion LLama.Examples/Examples/ChatSessionStripRoleName.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ public static async Task Run()

var parameters = new ModelParams(modelPath)
{
Seed = 1337,
GpuLayerCount = 5
};
using var model = await LLamaWeights.LoadFromFileAsync(parameters);
Expand Down
1 change: 0 additions & 1 deletion LLama.Examples/Examples/ChatSessionWithHistory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ public static async Task Run()

var parameters = new ModelParams(modelPath)
{
Seed = 1337,
GpuLayerCount = 5
};
using var model = await LLamaWeights.LoadFromFileAsync(parameters);
Expand Down
1 change: 0 additions & 1 deletion LLama.Examples/Examples/ChatSessionWithRestart.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ public static async Task Run()

var parameters = new ModelParams(modelPath)
{
Seed = 1337,
GpuLayerCount = 5
};
using var model = await LLamaWeights.LoadFromFileAsync(parameters);
Expand Down
1 change: 0 additions & 1 deletion LLama.Examples/Examples/ChatSessionWithRoleName.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ public static async Task Run()

var parameters = new ModelParams(modelPath)
{
Seed = 1337,
GpuLayerCount = 5
};
using var model = await LLamaWeights.LoadFromFileAsync(parameters);
Expand Down
Loading