Allow compilation when OpenMP is disabled (#1346)

robertmaynard · web-flow · commit bd9458661ad1 · 2025-10-01T18:46:35.000Z
Since we compile CUVS with warnings as errors, we need to disable the unknown pragma warning generated by our OpenMP pragmas so we can still compile. Authors: - Robert Maynard (https://github.com/robertmaynard) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Tamas Bela Feher (https://github.com/tfeher) URL: #1346
diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake
@@ -19,6 +19,11 @@ if(DISABLE_DEPRECATION_WARNINGS)
   )
 endif()
 
+if(DISABLE_OPENMP)
+  list(APPEND CUVS_CXX_FLAGS -Wno-unknown-pragmas)
+  list(APPEND CUVS_CUDA_FLAGS -Xcompiler=-Wno-unknown-pragmas)
+endif()
+
 # Be very strict when compiling with GCC as host compiler (and thus more lenient when compiling with
 # clang)
 if(CMAKE_COMPILER_IS_GNUCXX)
diff --git a/cpp/src/neighbors/detail/hnsw.hpp b/cpp/src/neighbors/detail/hnsw.hpp
@@ -193,7 +193,8 @@ std::enable_if_t<hierarchy == HnswHierarchy::CPU, std::unique_ptr<index<T>>> fro
     cagra_index.graph().extent(1) / 2,
     params.ef_construction);
   appr_algo->base_layer_init = false;  // tell hnswlib to build upper layers only
-  auto num_threads           = params.num_threads == 0 ? omp_get_max_threads() : params.num_threads;
+  [[maybe_unused]] auto num_threads =
+    params.num_threads == 0 ? omp_get_max_threads() : params.num_threads;
 #pragma omp parallel for num_threads(num_threads)
   for (int64_t i = 0; i < host_dataset_view.extent(0); i++) {
     appr_algo->addPoint((void*)(host_dataset_view.data_handle() + i * host_dataset_view.extent(1)),
@@ -544,7 +545,8 @@ void extend(raft::resources const& res,
     const_cast<void*>(idx.get_index()));
   auto current_element_count = hnswlib_index->getCurrentElementCount();
   auto new_element_count     = additional_dataset.extent(0);
-  auto num_threads           = params.num_threads == 0 ? omp_get_max_threads() : params.num_threads;
+  [[maybe_unused]] auto num_threads =
+    params.num_threads == 0 ? omp_get_max_threads() : params.num_threads;
 
   hnswlib_index->resizeIndex(current_element_count + new_element_count);
 #pragma omp parallel for num_threads(num_threads)
diff --git a/cpp/src/neighbors/refine/refine_host.hpp b/cpp/src/neighbors/refine/refine_host.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -370,10 +370,12 @@ template <typename DC, typename IdxT, typename DataT, typename DistanceT, typena
     // taking this into account.
     auto n_elements    = std::max(size_t(512), dim);
     auto max_n_threads = raft::div_rounding_up_safe<size_t>(n_queries * orig_k * dim, n_elements);
-    auto suggested_n_threads_for_distance = std::min(size_t(suggested_n_threads), max_n_threads);
+    [[maybe_unused]] auto suggested_n_threads_for_distance =
+      std::min(size_t(suggested_n_threads), max_n_threads);
 
     // The max number of threads for topk computation is the number of queries.
-    auto suggested_n_threads_for_topk = std::min(size_t(suggested_n_threads), n_queries);
+    [[maybe_unused]] auto suggested_n_threads_for_topk =
+      std::min(size_t(suggested_n_threads), n_queries);
 
     // Compute the refined distance using original dataset vectors
 #pragma omp parallel for collapse(2) num_threads(suggested_n_threads_for_distance)