Skip to content

Commit 641227a

Browse files
committed
Prevent nested parallelism in HNSW bench
- Setting the gbench number of threads and the HNSWlib config number of threads can lead to nested parallelism. Force either throughput mode using multiple gbench threads or latency mode using batch paralleism. - Added a check in `search` method to handle single query batch size efficiently. There is a significant overhead in going throught he thread pool.
1 parent cede915 commit 641227a

1 file changed

Lines changed: 12 additions & 3 deletions

File tree

cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55
#pragma once
@@ -169,8 +169,12 @@ void hnsw_lib<T>::set_search_param(const search_param_base& param_, const void*
169169
auto param = dynamic_cast<const search_param&>(param_);
170170
appr_alg_->ef_ = param.ef;
171171
num_threads_ = param.num_threads;
172-
// bench_mode_ = param.metric_objective;
173-
bench_mode_ = Mode::kLatency; // TODO(achirkin): pass the benchmark mode in the algo parameters
172+
if (cuvs::bench::benchmark_n_threads > 1) {
173+
bench_mode_ = Mode::kThroughput;
174+
num_threads_ = 1; // Prevent nested parallelism (gbench threads + batch threads).
175+
} else {
176+
bench_mode_ = Mode::kLatency;
177+
}
174178

175179
// Create a pool if multiple query threads have been set and the pool hasn't been created already
176180
bool create_pool = (bench_mode_ == Mode::kLatency && num_threads_ > 1 && !thread_pool_);
@@ -181,6 +185,11 @@ template <typename T>
181185
void hnsw_lib<T>::search(
182186
const T* query, int batch_size, int k, algo_base::index_type* indices, float* distances) const
183187
{
188+
if (batch_size == 1) {
189+
get_search_knn_results(query, k, indices, distances);
190+
return;
191+
}
192+
184193
auto f = [&](int i) {
185194
// hnsw can only handle a single vector at a time.
186195
get_search_knn_results(query + i * dim_, k, indices + i * k, distances + i * k);

0 commit comments

Comments
 (0)