Skip to content

Commit 380a4b4

Browse files
committed
llama: add new experimental context params from llama.cpp
Signed-off-by: deadprogram <ron@hybridgroup.com>
1 parent e864fdb commit 380a4b4

2 files changed

Lines changed: 7 additions & 1 deletion

File tree

pkg/llama/context.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ var FFITypeContextParams = ffi.NewType(
2323
&ffi.TypePointer, &ffi.TypePointer,
2424
&ffi.TypeUint8, &ffi.TypeUint8,
2525
&ffi.TypeUint8, &ffi.TypeUint8,
26-
&ffi.TypeUint8, &ffi.TypeUint8)
26+
&ffi.TypeUint8, &ffi.TypeUint8,
27+
&ffi.TypeUint32, &ffi.TypeSint32)
2728

2829
var (
2930
// LLAMA_API struct llama_context_params llama_context_default_params(void);

pkg/llama/llama.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,11 @@ type ContextParams struct {
341341
OpOffload uint8 // offload host tensor operations to device
342342
SwaFull uint8 // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
343343
KVUnified uint8 // use a unified buffer across the input sequences when computing the attentions
344+
// [EXPERIMENTAL]
345+
// backend sampler chain configuration (make sure the caller keeps the sampler chains alive)
346+
// note: the samplers must be sampler chains (i.e. use llama_sampler_chain_init)
347+
Samplers uintptr // llama_sampler_seq_config *
348+
NSamplers uint32 // number of sampler chains
344349
}
345350

346351
// Model quantize parameters

0 commit comments

Comments
 (0)