Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion LLama.Examples/ExampleRunner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ public class ExampleRunner
{ "Batched Executor: Save/Load", BatchedExecutorSaveAndLoad.Run },
{ "Batched Executor: Fork", BatchedExecutorFork.Run },
{ "Batched Executor: Rewind", BatchedExecutorRewind.Run },
{ "Batched Executor: Guidance", BatchedExecutorGuidance.Run },
{ "Batched Executor: LLava", BatchedExecutorLLava.Run },
{ "Batched Executor: BoolQ Benchmark", BatchedExecutorBoolQ.Run },
{ "Batched Executor: Beam Search", BatchedExecutorBeamSearch.Run },
Expand Down
12 changes: 7 additions & 5 deletions LLama.Examples/Examples/BatchedExecutorBeamSearch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ from beam in oldBeam.Sample(beamsCount)
while (beams.Count > beamsCount)
{
var beam = beams[0];
AnsiConsole.MarkupLineInterpolated($"[red]Culling Beam {beam.Conversation.ConversationId} (prob:{beam.CumulativeProbability:P10})[/]: {beam}");

var text = beam.ToString().EscapeMarkup();
AnsiConsole.MarkupLine($"[red]Culling Beam {beam.Conversation.ConversationId} (prob:{beam.CumulativeProbability:P5})[/]: {text}");

beam.Dispose();
beams.RemoveAt(0);
Expand Down Expand Up @@ -121,7 +123,7 @@ public List<Beam> Sample(int nbeams)
{
// Apply softmax, this calculates probabilities and sorts tokens into descending order
var logitsArr = LLamaTokenDataArray.Create(Conversation.Sample());
logitsArr.Softmax(Conversation.Executor.Context.NativeHandle);
logitsArr.Softmax();

// Create new forked conversations, one for each beam
var results = new List<Beam>();
Expand All @@ -135,14 +137,14 @@ public List<Beam> Sample(int nbeams)
var c = Conversation.Fork();

// Extend the conversation with the selected token.
c.Prompt(item.id);
c.Prompt(item.ID);

// Keep track of the cumulative probability of this entire sequence.
var p = CumulativeProbability * item.p;
var p = CumulativeProbability * item.Probability;

// Keep track of all tokens in this sequence, for decoding later
var t = Tokens.ToList();
t.Add(item.id);
t.Add(item.ID);

// Keep track of which beam this beam was derived from.
var s = Sequence.ToList();
Expand Down
27 changes: 13 additions & 14 deletions LLama.Examples/Examples/BatchedExecutorBoolQ.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
using System.Text;
using LLama.Batched;
using LLama.Common;
using LLama.Grammars;
using LLama.Native;
using Spectre.Console;
using LLama.Sampling;
Expand All @@ -10,6 +9,9 @@ namespace LLama.Examples.Examples;

public class BatchedExecutorBoolQ
{
// Answers may start with a space, and then must produce one of the listed strings followed by a newline character and nothing else.
private static readonly Grammar AnswerGrammar = new("root ::= (\" \")? (\"true\" | \"false\" | \"yes\" | \"no\") \"\\n\"", "root");

public static async Task Run()
{
// Load model weights
Expand All @@ -21,9 +23,6 @@ public static async Task Run()
var sys = AnsiConsole.Ask("System prompt", "Answer the question with a single word answer.");
var hint = AnsiConsole.Ask("Provide hints to model (test reading comprehension instead of knowledge)", true);

// Answers may start with a space, and then must produce one of the listed strings followed by a newline character and nothing else.
var grammar = Grammar.Parse("root ::= (\" \")? (\"true\" | \"false\" | \"yes\" | \"no\") \"\\n\"", "root");

// Create an executor that can evaluate a batch of conversations together
using var executor = new BatchedExecutor(model, parameters);

Expand Down Expand Up @@ -53,7 +52,7 @@ await AnsiConsole.Progress()

foreach (var chunk in chunks)
{
var result = await RunBatch(executor, tokensGenerate, grammar, sys, hint, chunk);
var result = await RunBatch(executor, tokensGenerate, sys, hint, chunk);
results.Add(result);

reporter.Increment(1);
Expand Down Expand Up @@ -87,10 +86,10 @@ await AnsiConsole.Progress()
}
}

private static async Task<BatchResult> RunBatch(BatchedExecutor executor, int maxTokens, Grammar grammar, string sys, bool hint, IEnumerable<(string, bool, string)> batch)
private static async Task<BatchResult> RunBatch(BatchedExecutor executor, int maxTokens, string sys, bool hint, IEnumerable<(string, bool, string)> batch)
{
var conversations = (from item in batch
select new ConversationRunner(executor, grammar, sys, item.Item1, item.Item2, hint ? item.Item3 : null)).ToArray();
select new ConversationRunner(executor, sys, item.Item1, item.Item2, hint ? item.Item3 : null)).ToArray();

for (var i = 0; i < maxTokens; i++)
{
Expand Down Expand Up @@ -135,6 +134,9 @@ private record BatchResult(int TruePositive, int TrueNegative, int FalsePositive
public float Accuracy => (float)Correct / Total;
}

/// <summary>
/// All of the mechanics necessary to run a conversation to answer a single question
/// </summary>
private class ConversationRunner
: IDisposable
{
Expand All @@ -149,14 +151,11 @@ private class ConversationRunner
public string Question { get; }
public bool Answer { get; }

public ConversationRunner(BatchedExecutor executor, Grammar grammar, string sys, string question, bool answer, string? hint)
public ConversationRunner(BatchedExecutor executor, string sys, string question, bool answer, string? hint)
{
_executor = executor;
_decoder = new StreamingTokenDecoder(executor.Context);
_sampler = new GreedySamplingPipeline
{
Grammar = grammar.CreateInstance(),
};
_sampler = new GreedySamplingPipeline { Grammar = AnswerGrammar };

// Make sure question ends with question mark
if (!question.EndsWith('?'))
Expand Down Expand Up @@ -192,7 +191,7 @@ public void Sample()
if (!_conversation.RequiresSampling)
return;

var token = _sampler.Sample(_executor.Context.NativeHandle, _conversation.Sample(), []);
var token = _sampler.Sample(_executor.Context, _conversation.GetSampleIndex());

var tokens = _executor.Context.NativeHandle.ModelHandle.Tokens;
if (tokens.IsEndOfGeneration(token) || tokens.Newline == token)
Expand All @@ -216,7 +215,7 @@ public void Prompt()
var token = _sampledToken.Value;
_sampledToken = default;

_sampler.Accept(_executor.Context.NativeHandle, token);
_sampler.Accept(token);
_decoder.Add(token);
_conversation.Prompt(token);
}
Expand Down
6 changes: 1 addition & 5 deletions LLama.Examples/Examples/BatchedExecutorFork.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
using LLama.Batched;
using LLama.Common;
using LLama.Native;
using LLama.Sampling;
using Spectre.Console;

Expand Down Expand Up @@ -77,9 +76,7 @@ await AnsiConsole
// Print some stats
var timings = executor.Context.NativeHandle.GetTimings();
AnsiConsole.MarkupLine($"Total Tokens Evaluated: {timings.TokensEvaluated}");
AnsiConsole.MarkupLine($"Total Tokens Sampled: {timings.TokensSampled}");
AnsiConsole.MarkupLine($"Eval Time: {(timings.Eval + timings.PromptEval).TotalMilliseconds}ms");
AnsiConsole.MarkupLine($"Sample Time: {timings.Sampling.TotalMilliseconds}ms");
}

private class Node
Expand Down Expand Up @@ -114,8 +111,7 @@ public void Sample()

// Sample one token
var ctx = _conversation.Executor.Context.NativeHandle;
var token = _sampler.Sample(ctx, _conversation.Sample(), Array.Empty<LLamaToken>());
_sampler.Accept(ctx, token);
var token = _sampler.Sample(ctx, _conversation.GetSampleIndex());
_decoder.Add(token);

// Prompt the conversation with this token, to continue generating from there
Expand Down
123 changes: 0 additions & 123 deletions LLama.Examples/Examples/BatchedExecutorGuidance.cs

This file was deleted.

2 changes: 1 addition & 1 deletion LLama.Examples/Examples/BatchedExecutorLLava.cs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ await AnsiConsole

await executor.Infer();

var token = sampler.Sample(executor.Context.NativeHandle, conversation.Sample(), Array.Empty<LLamaToken>());
var token = sampler.Sample(executor.Context.NativeHandle, conversation.GetSampleIndex());
if (executor.Context.NativeHandle.ModelHandle.Tokens.IsEndOfGeneration(token))
break;

Expand Down
2 changes: 1 addition & 1 deletion LLama.Examples/Examples/BatchedExecutorRewind.cs
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ private class Node

public LLamaToken Sample(Conversation conversation)
{
var token = _sampler.Sample(conversation.Executor.Context.NativeHandle, conversation.Sample(), []);
var token = _sampler.Sample(conversation.Executor.Context.NativeHandle, conversation.GetSampleIndex());
_tokens.Add(token);
return token;
}
Expand Down
4 changes: 2 additions & 2 deletions LLama.Examples/Examples/BatchedExecutorSaveAndLoad.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using LLama.Batched;
using LLama.Batched;
using LLama.Common;
using LLama.Native;
using LLama.Sampling;
Expand Down Expand Up @@ -94,7 +94,7 @@ private static async Task<LLamaToken> GenerateTokens(BatchedExecutor executor, C
await executor.Infer();

// Use sampling pipeline to pick a token
token = sampler.Sample(executor.Context.NativeHandle, conversation.Sample(), ReadOnlySpan<LLamaToken>.Empty);
token = sampler.Sample(executor.Context.NativeHandle, conversation.GetSampleIndex());

// Add it to the decoder, so it can be converted into text later
decoder.Add(token);
Expand Down
3 changes: 1 addition & 2 deletions LLama.Examples/Examples/ChatChineseGB2312.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using System.Text;
using System.Text;
using LLama.Common;

namespace LLama.Examples.Examples;
Expand Down Expand Up @@ -27,7 +27,6 @@ public static async Task Run()

var parameters = new ModelParams(modelPath)
{
Seed = 1337,
GpuLayerCount = 5,
Encoding = Encoding.UTF8
};
Expand Down
1 change: 0 additions & 1 deletion LLama.Examples/Examples/ChatSessionStripRoleName.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ public static async Task Run()

var parameters = new ModelParams(modelPath)
{
Seed = 1337,
GpuLayerCount = 5
};
using var model = await LLamaWeights.LoadFromFileAsync(parameters);
Expand Down
1 change: 0 additions & 1 deletion LLama.Examples/Examples/ChatSessionWithHistory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ public static async Task Run()

var parameters = new ModelParams(modelPath)
{
Seed = 1337,
GpuLayerCount = 5
};
using var model = await LLamaWeights.LoadFromFileAsync(parameters);
Expand Down
1 change: 0 additions & 1 deletion LLama.Examples/Examples/ChatSessionWithRestart.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ public static async Task Run()

var parameters = new ModelParams(modelPath)
{
Seed = 1337,
GpuLayerCount = 5
};
using var model = await LLamaWeights.LoadFromFileAsync(parameters);
Expand Down
1 change: 0 additions & 1 deletion LLama.Examples/Examples/ChatSessionWithRoleName.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ public static async Task Run()

var parameters = new ModelParams(modelPath)
{
Seed = 1337,
GpuLayerCount = 5
};
using var model = await LLamaWeights.LoadFromFileAsync(parameters);
Expand Down
Loading