forked from SciSharp/LLamaSharp
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathLLamaModelQuantizeParams.cs
More file actions
104 lines (91 loc) · 3.01 KB
/
LLamaModelQuantizeParams.cs
File metadata and controls
104 lines (91 loc) · 3.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
using System;
namespace LLama.Native
{
/// <summary>
/// Quantizer parameters used in the native API
/// </summary>
/// <remarks>llama_model_quantize_params</remarks>
[StructLayout(LayoutKind.Sequential)]
public struct LLamaModelQuantizeParams
{
/// <summary>
/// number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
/// </summary>
public int nthread;
/// <summary>
/// quantize to this llama_ftype
/// </summary>
public LLamaFtype ftype;
/// <summary>
/// output tensor type
/// </summary>
public GGMLType output_tensor_type;
/// <summary>
/// token embeddings tensor type
/// </summary>
public GGMLType token_embedding_type;
/// <summary>
/// allow quantizing non-f32/f16 tensors
/// </summary>
public bool allow_requantize
{
get => Convert.ToBoolean(_allow_requantize);
set => _allow_requantize = Convert.ToSByte(value);
}
private sbyte _allow_requantize;
/// <summary>
/// quantize output.weight
/// </summary>
public bool quantize_output_tensor
{
get => Convert.ToBoolean(_quantize_output_tensor);
set => _quantize_output_tensor = Convert.ToSByte(value);
}
private sbyte _quantize_output_tensor;
/// <summary>
/// only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
/// </summary>
public bool only_copy
{
get => Convert.ToBoolean(_only_copy);
set => _only_copy = Convert.ToSByte(value);
}
private sbyte _only_copy;
/// <summary>
/// quantize all tensors to the default type
/// </summary>
public bool pure
{
get => Convert.ToBoolean(_pure);
set => _pure = Convert.ToSByte(value);
}
private sbyte _pure;
/// <summary>
/// quantize to the same number of shards
/// </summary>
public bool keep_split
{
get => Convert.ToBoolean(_keep_split);
set => _keep_split = Convert.ToSByte(value);
}
private sbyte _keep_split;
/// <summary>
/// pointer to importance matrix data
/// </summary>
public IntPtr imatrix;
/// <summary>
/// pointer to vector containing overrides
/// </summary>
public IntPtr kv_overrides;
/// <summary>
/// Create a LLamaModelQuantizeParams with default values
/// </summary>
/// <returns></returns>
public static LLamaModelQuantizeParams Default()
{
return llama_model_quantize_default_params();
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
static extern LLamaModelQuantizeParams llama_model_quantize_default_params();
}
}
}