We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 31aa045 commit 1cdc886Copy full SHA for 1cdc886
csrc/quantization/gguf/vecdotq.cuh
@@ -37,6 +37,8 @@ static __device__ __forceinline__ int get_int_from_uint8_aligned(const uint8_t *
37
return *((const int *) (x8 + sizeof(int) * i32)); // assume at least 4 byte alignment
38
}
39
40
+// VDR = vec dot ratio, how many contiguous integers each thread processes when the vec dot kernel is called
41
+// MMVQ = mul_mat_vec_q, MMQ = mul_mat_q
42
43
#define VDR_Q4_0_Q8_1_MMVQ 2
44
#define VDR_Q4_0_Q8_1_MMQ 4
0 commit comments