Prevent nested parallelism in HNSW bench (rapidsai#1895)

julianmi · jrbourbeau · commit 868a35774ee2 · 2026-03-25T20:46:18.000Z
Setting the gbench number of threads and the HNSWlib config number of threads can lead to nested parallelism. This patch proposes to either use throughput mode using multiple gbench threads or latency mode using batch parallelism. Additionally, there is a significant overhead in going through the thread pool. It is skipped in the `search` method to handle single query batch size efficiently. Authors: - Julian Miller (https://github.com/julianmi) Approvers: - Artem M. Chirkin (https://github.com/achirkin) URL: rapidsai#1895
diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 #pragma once
@@ -169,8 +169,12 @@ void hnsw_lib<T>::set_search_param(const search_param_base& param_, const void*
   auto param     = dynamic_cast<const search_param&>(param_);
   appr_alg_->ef_ = param.ef;
   num_threads_   = param.num_threads;
-  // bench_mode_ = param.metric_objective;
-  bench_mode_ = Mode::kLatency;  // TODO(achirkin): pass the benchmark mode in the algo parameters
+  if (cuvs::bench::benchmark_n_threads > 1) {
+    bench_mode_  = Mode::kThroughput;
+    num_threads_ = 1;  // Prevent nested parallelism (gbench threads + batch threads).
+  } else {
+    bench_mode_ = Mode::kLatency;
+  }
 
   // Create a pool if multiple query threads have been set and the pool hasn't been created already
   bool create_pool = (bench_mode_ == Mode::kLatency && num_threads_ > 1 && !thread_pool_);
@@ -181,6 +185,11 @@ template <typename T>
 void hnsw_lib<T>::search(
   const T* query, int batch_size, int k, algo_base::index_type* indices, float* distances) const
 {
+  if (batch_size == 1) {
+    get_search_knn_results(query, k, indices, distances);
+    return;
+  }
+
   auto f = [&](int i) {
     // hnsw can only handle a single vector at a time.
     get_search_knn_results(query + i * dim_, k, indices + i * k, distances + i * k);