Comments + add simple bfloat16 w/ AVQ test

rmaschal · rmaschal · commit 89a2ec76125c · 2025-10-06T11:09:18.000-07:00
diff --git a/cpp/include/cuvs/neighbors/scann.hpp b/cpp/include/cuvs/neighbors/scann.hpp
@@ -75,13 +75,20 @@ struct index_params : cuvs::neighbors::index_params {
   uint32_t pq_train_iters = 10;
 
   /** whether to apply bf16 quantization of dataset vectors **/
-  bool reordering_bf16_enabled = false;
-
-  /** Threshold for computing AVQ eta va Theorem 3.4 in https://arxiv.org/abs/1908.10396
+  bool reordering_bf16 = false;
+
+  /** Threshold T for computing AVQ eta = (dim - 1) ( T^2 / || x ||^2) / ( 1 - T^2 / || x ||^2)
+   *
+   * When quantizing a vector x to x_q, AVQ minimizes the loss function
+   * L(x, x_q) = eta * || r_para ||^2 + || r_perp ||^2, where
+   * r = x - x_q, r_para = <r, x> * x / || x ||^2, r_perp = r - r_para
+   *
+   * Compared to L2 loss, This produces an x_q which better approximates
+   * the dot product of a query vector with x
+   *
    * If the threshold is NAN, AVQ is not performed during bfloat16 quant
    */
   float reordering_noise_shaping_threshold = NAN;
-  // TODO - add other scann build params
 };
 
 /**
@@ -141,7 +148,7 @@ struct index : cuvs::neighbors::index {
         IdxT dim,
         uint32_t pq_clusters,
         uint32_t pq_num_subspaces,
-        bool reordering_bf16_enabled)
+        bool reordering_bf16)
     : cuvs::neighbors::index(),
       metric_(metric),
       pq_dim_(pq_dim),
@@ -159,7 +166,7 @@ struct index : cuvs::neighbors::index {
       n_rows_(n_rows),
       dim_(dim),
       bf16_dataset_(raft::make_host_matrix<int16_t, IdxT, raft::row_major>(
-        reordering_bf16_enabled ? n_rows : 0, reordering_bf16_enabled ? dim : 0))
+        reordering_bf16 ? n_rows : 0, reordering_bf16 ? dim : 0))
 
   {
   }
@@ -174,7 +181,7 @@ struct index : cuvs::neighbors::index {
             dim,
             1 << params.pq_bits,
             dim / params.pq_dim,
-            params.reordering_bf16_enabled)
+            params.reordering_bf16)
   {
     RAFT_EXPECTS(params.pq_bits == 4 || params.pq_bits == 8, "ScaNN only supports 4 or 8 bit PQ");
     RAFT_EXPECTS(dim >= params.pq_dim,
diff --git a/cpp/src/neighbors/scann/detail/scann_build.cuh b/cpp/src/neighbors/scann/detail/scann_build.cuh
@@ -313,7 +313,7 @@ index<T, IdxT> build(
     // TODO (rmaschal): Might be more efficient to do on CPU, to avoid DtoH copy
     auto bf16_dataset = raft::make_device_matrix<int16_t, int64_t>(res, batch_view.extent(0), dim);
 
-    if (params.reordering_bf16_enabled) {
+    if (params.reordering_bf16) {
       quantize_bfloat16(
         res, batch_view, bf16_dataset.view(), params.reordering_noise_shaping_threshold);
     }
@@ -333,7 +333,7 @@ index<T, IdxT> build(
                quantized_soar_residuals.size(),
                stream);
 
-    if (params.reordering_bf16_enabled) {
+    if (params.reordering_bf16) {
       raft::copy(idx.bf16_dataset().data_handle() + batch.offset() * dim,
                  bf16_dataset.data_handle(),
                  bf16_dataset.size(),
diff --git a/cpp/src/neighbors/scann/detail/scann_quantize.cuh b/cpp/src/neighbors/scann/detail/scann_quantize.cuh
@@ -314,6 +314,11 @@ __device__ inline float bfloat16_to_float(int16_t& bf16)
  * Based on the signs of the current residual and quantized value,
  * increment or decrement the quantized value to push residual closer to 0
  *
+ * Note that the bfloat16 value is encoded as an int16_t, and the
+ * increment/decrement is applied to encoded value. In terms of the float
+ * representation, it is the mantissa that is being incremented/decremented,
+ * which could carryover to the exponent
+ *
  * @param res the float residual
  * @param current the current quantized dimension
  * @return the other possible quantized value
@@ -328,9 +333,6 @@ __device__ inline int16_t bfloat16_next_delta(float& res, int16_t& current)
   return current + 1;
 }
 
-/**
- *
- */
 template <uint32_t BlockSize, typename IdxT>
 __launch_bounds__(BlockSize) RAFT_KERNEL
   quantize_bfloat16_noise_shaped_kernel(raft::device_matrix_view<const float, IdxT> dataset,
diff --git a/cpp/tests/neighbors/ann_scann.cuh b/cpp/tests/neighbors/ann_scann.cuh
@@ -131,7 +131,7 @@ class scann_test : public ::testing::TestWithParam<scann_inputs> {
     ASSERT_EQ(index.pq_codebook().extent(0), num_pq_clusters);
     ASSERT_EQ(index.pq_codebook().extent(1), ps.dim);
 
-    IdxT expected_bf16_size = ps.index_params.reordering_bf16_enabled ? ps.dim * ps.num_db_vecs : 0;
+    IdxT expected_bf16_size = ps.index_params.reordering_bf16 ? ps.dim * ps.num_db_vecs : 0;
 
     ASSERT_EQ(index.bf16_dataset().size(), expected_bf16_size);
   }
@@ -227,7 +227,16 @@ inline auto big_dims_all_pq_bits() -> test_cases_t
 inline auto bf16() -> test_cases_t
 {
   scann_inputs ts;
-  ts.index_params.reordering_bf16_enabled = true;
+  ts.index_params.reordering_bf16 = true;
+
+  return {ts};
+}
+
+inline auto bf16_avq() -> test_cases_t
+{
+  scann_inputs ts;
+  ts.index_params.reordering_bf16                    = true;
+  ts.index_params.reordering_noise_shaping_threshold = 0.2;
 
   return {ts};
 }
diff --git a/cpp/tests/neighbors/ann_scann/test_float_int64_t.cu b/cpp/tests/neighbors/ann_scann/test_float_int64_t.cu
@@ -25,7 +25,7 @@ TEST_BUILD_HOST_INPUT(f32_i64)
 TEST_BUILD_HOST_INPUT_OVERLAP(f32_i64);
 
 INSTANTIATE(f32_i64,
-            defaults() + small_dims_all_pq_bits() + big_dims_all_pq_bits() + bf16() + avq() +
-              soar());
+            defaults() + small_dims_all_pq_bits() + big_dims_all_pq_bits() + bf16() + bf16_avq() +
+              avq() + soar());
 
 }  // namespace cuvs::neighbors::experimental::scann