@@ -75,13 +75,20 @@ struct index_params : cuvs::neighbors::index_params {
7575 uint32_t pq_train_iters = 10 ;
7676
7777 /* * whether to apply bf16 quantization of dataset vectors **/
78- bool reordering_bf16_enabled = false ;
79-
80- /* * Threshold for computing AVQ eta va Theorem 3.4 in https://arxiv.org/abs/1908.10396
78+ bool reordering_bf16 = false ;
79+
80+ /* * Threshold T for computing AVQ eta = (dim - 1) ( T^2 / || x ||^2) / ( 1 - T^2 / || x ||^2)
81+ *
82+ * When quantizing a vector x to x_q, AVQ minimizes the loss function
83+ * L(x, x_q) = eta * || r_para ||^2 + || r_perp ||^2, where
84+ * r = x - x_q, r_para = <r, x> * x / || x ||^2, r_perp = r - r_para
85+ *
86+ * Compared to L2 loss, This produces an x_q which better approximates
87+ * the dot product of a query vector with x
88+ *
8189 * If the threshold is NAN, AVQ is not performed during bfloat16 quant
8290 */
8391 float reordering_noise_shaping_threshold = NAN;
84- // TODO - add other scann build params
8592};
8693
8794/* *
@@ -141,7 +148,7 @@ struct index : cuvs::neighbors::index {
141148 IdxT dim,
142149 uint32_t pq_clusters,
143150 uint32_t pq_num_subspaces,
144- bool reordering_bf16_enabled )
151+ bool reordering_bf16 )
145152 : cuvs::neighbors::index(),
146153 metric_ (metric),
147154 pq_dim_(pq_dim),
@@ -159,7 +166,7 @@ struct index : cuvs::neighbors::index {
159166 n_rows_(n_rows),
160167 dim_(dim),
161168 bf16_dataset_(raft::make_host_matrix<int16_t , IdxT, raft::row_major>(
162- reordering_bf16_enabled ? n_rows : 0 , reordering_bf16_enabled ? dim : 0 ))
169+ reordering_bf16 ? n_rows : 0 , reordering_bf16 ? dim : 0 ))
163170
164171 {
165172 }
@@ -174,7 +181,7 @@ struct index : cuvs::neighbors::index {
174181 dim,
175182 1 << params.pq_bits,
176183 dim / params.pq_dim,
177- params.reordering_bf16_enabled )
184+ params.reordering_bf16 )
178185 {
179186 RAFT_EXPECTS (params.pq_bits == 4 || params.pq_bits == 8 , " ScaNN only supports 4 or 8 bit PQ" );
180187 RAFT_EXPECTS (dim >= params.pq_dim ,
0 commit comments