Skip to content

Commit 9fc1cfd

Browse files
authored
Add HNSW ACE build method (#1597)
This PR adds a direct `hnsw::build` API that uses the ACE (Augmented Core Extraction) algorithm to build HNSW indexes on the GPU. ACE enables building HNSW indexes for datasets too large to fit in GPU memory by partitioning the data and building sub-indexes. CC @tfeher **C++ API** - Added `hnsw::build()` function with ACE parameters for direct HNSW index construction. This serializes an HNSW index to disk if `use_disk` is true. - Added `hnsw::graph_build_params::ace_params` struct with configurable options: - `npartitions` - number of partitions for parallel build - `ef_construction` - index quality parameter - `build_dir` - directory for disk-based build artifacts - `use_disk` - force disk-based storage mode - Implemented proper serialization/deserialization for disk-backed HNSW indexes - Added C++ tests in `ann_hnsw_ace.cuh` **C API** - Added `cuvsHnswBuild` function with ACE parameters - Added C tests in `ann_hnsw_ace.cu` **Python** - Added `hnsw.AceParams` class for configuring ACE builds - Added Python tests in `test_hnsw_ace.py` **Java** - Added `HnswAceParams` class - Added Java tests in `HnswAceBuildAndSearchIT.java` **Documentation** - Added `cuvs_hnsw` section to the parameter tuning guide with ACE parameters **Example** - Added `hnsw_ace_example.cu` demonstrating the build → deserialize → search workflow Authors: - Julian Miller (https://github.com/julianmi) - Tamas Bela Feher (https://github.com/tfeher) Approvers: - Tamas Bela Feher (https://github.com/tfeher) - Robert Maynard (https://github.com/robertmaynard) - Divye Gala (https://github.com/divyegala) - MithunR (https://github.com/mythrocks) URL: #1597
1 parent 2987eb2 commit 9fc1cfd

38 files changed

Lines changed: 2663 additions & 442 deletions

c/include/cuvs/neighbors/cagra.h

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

@@ -127,10 +127,10 @@ typedef struct cuvsIvfPqParams* cuvsIvfPqParams_t;
127127

128128
/**
129129
* Parameters for ACE (Augmented Core Extraction) graph build.
130-
* ACE enables building indices for datasets too large to fit in GPU memory by:
130+
* ACE enables building indexes for datasets too large to fit in GPU memory by:
131131
* 1. Partitioning the dataset in core (closest) and augmented (second-closest)
132132
* partitions using balanced k-means.
133-
* 2. Building sub-indices for each partition independently
133+
* 2. Building sub-indexes for each partition independently
134134
* 3. Concatenating sub-graphs into a final unified index
135135
*/
136136
struct cuvsAceParams {
@@ -251,22 +251,6 @@ cuvsError_t cuvsAceParamsCreate(cuvsAceParams_t* params);
251251
*/
252252
cuvsError_t cuvsAceParamsDestroy(cuvsAceParams_t params);
253253

254-
/**
255-
* @brief Allocate ACE params, and populate with default values
256-
*
257-
* @param[in] params cuvsAceParams_t to allocate
258-
* @return cuvsError_t
259-
*/
260-
cuvsError_t cuvsAceParamsCreate(cuvsAceParams_t* params);
261-
262-
/**
263-
* @brief De-allocate ACE params
264-
*
265-
* @param[in] params
266-
* @return cuvsError_t
267-
*/
268-
cuvsError_t cuvsAceParamsDestroy(cuvsAceParams_t params);
269-
270254
/**
271255
* @brief Create CAGRA index parameters similar to an HNSW index
272256
*

c/include/cuvs/neighbors/hnsw.h

Lines changed: 127 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

@@ -36,6 +36,50 @@ enum cuvsHnswHierarchy {
3636
GPU = 2
3737
};
3838

39+
/**
40+
* Parameters for ACE (Augmented Core Extraction) graph build for HNSW.
41+
* ACE enables building indexes for datasets too large to fit in GPU memory by:
42+
* 1. Partitioning the dataset in core and augmented partitions using balanced k-means
43+
* 2. Building sub-indexes for each partition independently
44+
* 3. Concatenating sub-graphs into a final unified index
45+
*/
46+
struct cuvsHnswAceParams {
47+
/**
48+
* Number of partitions for ACE partitioned build.
49+
* Small values might improve recall but potentially degrade performance and
50+
* increase memory usage. 100k - 5M vectors per partition is recommended.
51+
*/
52+
size_t npartitions;
53+
/**
54+
* Directory to store ACE build artifacts (e.g., KNN graph, optimized graph).
55+
* Used when `use_disk` is true or when the graph does not fit in memory.
56+
*/
57+
const char* build_dir;
58+
/**
59+
* Whether to use disk-based storage for ACE build.
60+
* When true, enables disk-based operations for memory-efficient graph construction.
61+
*/
62+
bool use_disk;
63+
};
64+
65+
typedef struct cuvsHnswAceParams* cuvsHnswAceParams_t;
66+
67+
/**
68+
* @brief Allocate HNSW ACE params, and populate with default values
69+
*
70+
* @param[in] params cuvsHnswAceParams_t to allocate
71+
* @return cuvsError_t
72+
*/
73+
cuvsError_t cuvsHnswAceParamsCreate(cuvsHnswAceParams_t* params);
74+
75+
/**
76+
* @brief De-allocate HNSW ACE params
77+
*
78+
* @param[in] params
79+
* @return cuvsError_t
80+
*/
81+
cuvsError_t cuvsHnswAceParamsDestroy(cuvsHnswAceParams_t params);
82+
3983
struct cuvsHnswIndexParams {
4084
/* hierarchy of the hnsw index */
4185
enum cuvsHnswHierarchy hierarchy;
@@ -49,6 +93,17 @@ struct cuvsHnswIndexParams {
4993
is parallelized with the help of CPU threads.
5094
*/
5195
int num_threads;
96+
/** HNSW M parameter: number of bi-directional links per node (used when building with ACE).
97+
* graph_degree = m * 2, intermediate_graph_degree = m * 3.
98+
*/
99+
size_t M;
100+
/** Distance type for the index. */
101+
cuvsDistanceType metric;
102+
/**
103+
* Optional: specify ACE parameters for building HNSW index using ACE algorithm.
104+
* Set to nullptr for default behavior (from_cagra conversion).
105+
*/
106+
cuvsHnswAceParams_t ace_params;
52107
};
53108

54109
typedef struct cuvsHnswIndexParams* cuvsHnswIndexParams_t;
@@ -203,6 +258,77 @@ cuvsError_t cuvsHnswFromCagraWithDataset(cuvsResources_t res,
203258
* @}
204259
*/
205260

261+
/**
262+
* @defgroup hnsw_c_index_build Build HNSW index using ACE algorithm
263+
* @{
264+
*/
265+
266+
/**
267+
* @brief Build an HNSW index using ACE (Augmented Core Extraction) algorithm.
268+
*
269+
* ACE enables building HNSW indexes for datasets too large to fit in GPU memory by:
270+
* 1. Partitioning the dataset using balanced k-means into core and augmented partitions
271+
* 2. Building sub-indexes for each partition independently
272+
* 3. Concatenating sub-graphs into a final unified index
273+
*
274+
* NOTE: This function requires CUDA to be available at runtime.
275+
*
276+
* @param[in] res cuvsResources_t opaque C handle
277+
* @param[in] params cuvsHnswIndexParams_t with ACE parameters configured
278+
* @param[in] dataset DLManagedTensor* host dataset to build index from
279+
* @param[out] index cuvsHnswIndex_t to return the built HNSW index
280+
*
281+
* @return cuvsError_t
282+
*
283+
* @code{.c}
284+
* #include <cuvs/core/c_api.h>
285+
* #include <cuvs/neighbors/hnsw.h>
286+
*
287+
* // Create cuvsResources_t
288+
* cuvsResources_t res;
289+
* cuvsResourcesCreate(&res);
290+
*
291+
* // Create ACE parameters
292+
* cuvsHnswAceParams_t ace_params;
293+
* cuvsHnswAceParamsCreate(&ace_params);
294+
* ace_params->npartitions = 4;
295+
* ace_params->use_disk = true;
296+
* ace_params->build_dir = "/tmp/hnsw_ace_build";
297+
*
298+
* // Create index parameters
299+
* cuvsHnswIndexParams_t params;
300+
* cuvsHnswIndexParamsCreate(&params);
301+
* params->hierarchy = GPU;
302+
* params->ace_params = ace_params;
303+
* params->M = 32;
304+
* params->ef_construction = 120;
305+
*
306+
* // Create HNSW index
307+
* cuvsHnswIndex_t hnsw_index;
308+
* cuvsHnswIndexCreate(&hnsw_index);
309+
*
310+
* // Assume dataset is a populated DLManagedTensor with host data
311+
* DLManagedTensor dataset;
312+
*
313+
* // Build the index
314+
* cuvsHnswBuild(res, params, &dataset, hnsw_index);
315+
*
316+
* // Clean up
317+
* cuvsHnswAceParamsDestroy(ace_params);
318+
* cuvsHnswIndexParamsDestroy(params);
319+
* cuvsHnswIndexDestroy(hnsw_index);
320+
* cuvsResourcesDestroy(res);
321+
* @endcode
322+
*/
323+
cuvsError_t cuvsHnswBuild(cuvsResources_t res,
324+
cuvsHnswIndexParams_t params,
325+
DLManagedTensor* dataset,
326+
cuvsHnswIndex_t index);
327+
328+
/**
329+
* @}
330+
*/
331+
206332
/**
207333
* @defgroup hnsw_c_index_extend Extend HNSW index with additional vectors
208334
* @{

c/src/neighbors/hnsw.cpp

Lines changed: 74 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11

22
/*
3-
* SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
3+
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
44
* SPDX-License-Identifier: Apache-2.0
55
*/
66

@@ -23,6 +23,36 @@
2323

2424
namespace {
2525

26+
template <typename T>
27+
void _build(cuvsResources_t res,
28+
cuvsHnswIndexParams_t params,
29+
DLManagedTensor* dataset_tensor,
30+
cuvsHnswIndex_t hnsw_index)
31+
{
32+
auto res_ptr = reinterpret_cast<raft::resources*>(res);
33+
auto cpp_params = cuvs::neighbors::hnsw::index_params();
34+
cpp_params.hierarchy = static_cast<cuvs::neighbors::hnsw::HnswHierarchy>(params->hierarchy);
35+
cpp_params.ef_construction = params->ef_construction;
36+
cpp_params.num_threads = params->num_threads;
37+
cpp_params.M = params->M;
38+
cpp_params.metric = static_cast<cuvs::distance::DistanceType>(params->metric);
39+
40+
// Configure ACE parameters
41+
RAFT_EXPECTS(params->ace_params != nullptr, "ACE parameters must be set for hnsw::build");
42+
auto ace_params = cuvs::neighbors::hnsw::graph_build_params::ace_params();
43+
ace_params.npartitions = params->ace_params->npartitions;
44+
ace_params.build_dir = params->ace_params->build_dir ? params->ace_params->build_dir : "/tmp/hnsw_ace_build";
45+
ace_params.use_disk = params->ace_params->use_disk;
46+
cpp_params.graph_build_params = ace_params;
47+
48+
using dataset_mdspan_type = raft::host_matrix_view<T const, int64_t, raft::row_major>;
49+
auto dataset_mds = cuvs::core::from_dlpack<dataset_mdspan_type>(dataset_tensor);
50+
51+
auto hnsw_index_unique_ptr = cuvs::neighbors::hnsw::build(*res_ptr, cpp_params, dataset_mds);
52+
auto hnsw_index_ptr = hnsw_index_unique_ptr.release();
53+
hnsw_index->addr = reinterpret_cast<uintptr_t>(hnsw_index_ptr);
54+
}
55+
2656
template <typename T>
2757
void _from_cagra(cuvsResources_t res,
2858
cuvsHnswIndexParams_t params,
@@ -118,11 +148,29 @@ void* _deserialize(cuvsResources_t res,
118148
}
119149
} // namespace
120150

151+
extern "C" cuvsError_t cuvsHnswAceParamsCreate(cuvsHnswAceParams_t* params)
152+
{
153+
return cuvs::core::translate_exceptions([=] {
154+
*params = new cuvsHnswAceParams{.npartitions = 1,
155+
.build_dir = "/tmp/hnsw_ace_build",
156+
.use_disk = false};
157+
});
158+
}
159+
160+
extern "C" cuvsError_t cuvsHnswAceParamsDestroy(cuvsHnswAceParams_t params)
161+
{
162+
return cuvs::core::translate_exceptions([=] { delete params; });
163+
}
164+
121165
extern "C" cuvsError_t cuvsHnswIndexParamsCreate(cuvsHnswIndexParams_t* params)
122166
{
123167
return cuvs::core::translate_exceptions([=] {
124-
*params = new cuvsHnswIndexParams{
125-
.hierarchy = cuvsHnswHierarchy::NONE, .ef_construction = 200, .num_threads = 0};
168+
*params = new cuvsHnswIndexParams{.hierarchy = cuvsHnswHierarchy::NONE,
169+
.ef_construction = 200,
170+
.num_threads = 0,
171+
.M = 32,
172+
.metric = L2Expanded,
173+
.ace_params = nullptr};
126174
});
127175
}
128176

@@ -213,6 +261,29 @@ extern "C" cuvsError_t cuvsHnswFromCagraWithDataset(cuvsResources_t res,
213261
});
214262
}
215263

264+
extern "C" cuvsError_t cuvsHnswBuild(cuvsResources_t res,
265+
cuvsHnswIndexParams_t params,
266+
DLManagedTensor* dataset,
267+
cuvsHnswIndex_t index)
268+
{
269+
return cuvs::core::translate_exceptions([=] {
270+
auto dataset_dl = dataset->dl_tensor;
271+
index->dtype = dataset_dl.dtype;
272+
273+
if (dataset_dl.dtype.code == kDLFloat && dataset_dl.dtype.bits == 32) {
274+
_build<float>(res, params, dataset, index);
275+
} else if (dataset_dl.dtype.code == kDLFloat && dataset_dl.dtype.bits == 16) {
276+
_build<half>(res, params, dataset, index);
277+
} else if (dataset_dl.dtype.code == kDLUInt && dataset_dl.dtype.bits == 8) {
278+
_build<uint8_t>(res, params, dataset, index);
279+
} else if (dataset_dl.dtype.code == kDLInt && dataset_dl.dtype.bits == 8) {
280+
_build<int8_t>(res, params, dataset, index);
281+
} else {
282+
RAFT_FAIL("Unsupported dtype: code=%d, bits=%d", dataset_dl.dtype.code, dataset_dl.dtype.bits);
283+
}
284+
});
285+
}
286+
216287
extern "C" cuvsError_t cuvsHnswExtend(cuvsResources_t res,
217288
cuvsHnswExtendParams_t params,
218289
DLManagedTensor* additional_dataset,

0 commit comments

Comments
 (0)