Skip to content

Commit d02e0fc

Browse files
authored
MG Python API (#1307)
Authors: - Victor Lafargue (https://github.com/viclafargue) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Ben Frederickson (https://github.com/benfred) URL: #1307
1 parent cb2fafd commit d02e0fc

36 files changed

Lines changed: 4799 additions & 76 deletions

cpp/include/cuvs/core/c_api.h

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -75,23 +75,6 @@ cuvsError_t cuvsResourcesCreate(cuvsResources_t* res);
7575
*/
7676
cuvsError_t cuvsResourcesDestroy(cuvsResources_t res);
7777

78-
/**
79-
* @brief Create an Initialized opaque C handle for C++ type `raft::device_resources_snmg`
80-
* for multi-GPU operations
81-
*
82-
* @param[in] res cuvsResources_t opaque C handle
83-
* @return cuvsError_t
84-
*/
85-
cuvsError_t cuvsMultiGpuResourcesCreate(cuvsResources_t* res);
86-
87-
/**
88-
* @brief Destroy and de-allocate opaque C handle for C++ type `raft::device_resources_snmg`
89-
*
90-
* @param[in] res cuvsResources_t opaque C handle
91-
* @return cuvsError_t
92-
*/
93-
cuvsError_t cuvsMultiGpuResourcesDestroy(cuvsResources_t res);
94-
9578
/**
9679
* @brief Set cudaStream_t on cuvsResources_t to queue CUDA kernels on APIs
9780
* that accept a cuvsResources_t handle

cpp/scripts/gitutils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
1+
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -155,7 +155,7 @@ def uncommittedFiles():
155155
ret = []
156156
for f in files.splitlines():
157157
f = f.strip(" ")
158-
f = re.sub("\s+", " ", f) # noqa: W605
158+
f = re.sub(r"\s+", " ", f) # noqa: W605
159159
tmp = f.split(" ", 1)
160160
# only consider staged files or uncommitted files
161161
# in other words, ignore untracked files

cpp/src/neighbors/mg_cagra_c.cpp

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,12 @@ extern "C" cuvsError_t cuvsMultiGpuCagraBuild(cuvsResources_t res,
267267
cuvsMultiGpuCagraIndex_t index)
268268
{
269269
return cuvs::core::translate_exceptions([=] {
270-
auto dataset = dataset_tensor->dl_tensor;
270+
auto dataset = dataset_tensor->dl_tensor;
271+
272+
// Multi-GPU CAGRA requires dataset to be in host memory
273+
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(dataset),
274+
"Multi-GPU CAGRA build requires dataset to have host compatible memory");
275+
271276
index->dtype.code = dataset.dtype.code;
272277
index->dtype.bits = dataset.dtype.bits;
273278

@@ -295,7 +300,29 @@ extern "C" cuvsError_t cuvsMultiGpuCagraSearch(cuvsResources_t res,
295300
DLManagedTensor* distances_tensor)
296301
{
297302
return cuvs::core::translate_exceptions([=] {
298-
auto queries = queries_tensor->dl_tensor;
303+
auto queries = queries_tensor->dl_tensor;
304+
auto neighbors = neighbors_tensor->dl_tensor;
305+
auto distances = distances_tensor->dl_tensor;
306+
307+
// Multi-GPU CAGRA requires all tensors to be in host memory
308+
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(queries),
309+
"Multi-GPU CAGRA search requires queries to have host compatible memory");
310+
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(neighbors),
311+
"Multi-GPU CAGRA search requires neighbors to have host compatible memory");
312+
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(distances),
313+
"Multi-GPU CAGRA search requires distances to have host compatible memory");
314+
315+
// Validate data types
316+
RAFT_EXPECTS(neighbors.dtype.code == kDLInt && neighbors.dtype.bits == 64,
317+
"neighbors should be of type int64_t");
318+
RAFT_EXPECTS(distances.dtype.code == kDLFloat && distances.dtype.bits == 32,
319+
"distances should be of type float32");
320+
321+
// Check type compatibility between index and queries
322+
RAFT_EXPECTS(queries.dtype.code == index->dtype.code,
323+
"type mismatch between index and queries");
324+
RAFT_EXPECTS(queries.dtype.bits == index->dtype.bits,
325+
"type mismatch between index and queries");
299326

300327
if (queries.dtype.code == kDLFloat && queries.dtype.bits == 32) {
301328
_mg_search<float>(res, *params, *index, queries_tensor, neighbors_tensor, distances_tensor);
@@ -321,6 +348,25 @@ extern "C" cuvsError_t cuvsMultiGpuCagraExtend(cuvsResources_t res,
321348
return cuvs::core::translate_exceptions([=] {
322349
auto vectors = new_vectors_tensor->dl_tensor;
323350

351+
// Multi-GPU CAGRA requires vectors to be in host memory
352+
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(vectors),
353+
"Multi-GPU CAGRA extend requires new_vectors to have host compatible memory");
354+
355+
// Check type compatibility between index and vectors
356+
RAFT_EXPECTS(vectors.dtype.code == index->dtype.code,
357+
"type mismatch between index and new_vectors");
358+
RAFT_EXPECTS(vectors.dtype.bits == index->dtype.bits,
359+
"type mismatch between index and new_vectors");
360+
361+
// If indices are provided, they should also be in host memory
362+
if (new_indices_tensor != nullptr) {
363+
auto indices = new_indices_tensor->dl_tensor;
364+
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(indices),
365+
"Multi-GPU CAGRA extend requires new_indices to have host compatible memory");
366+
RAFT_EXPECTS(indices.dtype.code == kDLUInt && indices.dtype.bits == 32,
367+
"new_indices should be of type uint32_t");
368+
}
369+
324370
if (vectors.dtype.code == kDLFloat && vectors.dtype.bits == 32) {
325371
_mg_extend<float>(res, *index, new_vectors_tensor, new_indices_tensor);
326372
} else if (vectors.dtype.code == kDLFloat && vectors.dtype.bits == 16) {

cpp/src/neighbors/mg_ivf_flat_c.cpp

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,12 @@ extern "C" cuvsError_t cuvsMultiGpuIvfFlatBuild(cuvsResources_t res,
264264
cuvsMultiGpuIvfFlatIndex_t index)
265265
{
266266
return cuvs::core::translate_exceptions([=] {
267-
auto dataset = dataset_tensor->dl_tensor;
267+
auto dataset = dataset_tensor->dl_tensor;
268+
269+
// Multi-GPU IVF-Flat requires dataset to be in host memory
270+
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(dataset),
271+
"Multi-GPU IVF-Flat build requires dataset to have host compatible memory");
272+
268273
index->dtype.code = dataset.dtype.code;
269274
index->dtype.bits = dataset.dtype.bits;
270275

@@ -292,7 +297,29 @@ extern "C" cuvsError_t cuvsMultiGpuIvfFlatSearch(cuvsResources_t res,
292297
DLManagedTensor* distances_tensor)
293298
{
294299
return cuvs::core::translate_exceptions([=] {
295-
auto queries = queries_tensor->dl_tensor;
300+
auto queries = queries_tensor->dl_tensor;
301+
auto neighbors = neighbors_tensor->dl_tensor;
302+
auto distances = distances_tensor->dl_tensor;
303+
304+
// Multi-GPU IVF-Flat requires all tensors to be in host memory
305+
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(queries),
306+
"Multi-GPU IVF-Flat search requires queries to have host compatible memory");
307+
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(neighbors),
308+
"Multi-GPU IVF-Flat search requires neighbors to have host compatible memory");
309+
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(distances),
310+
"Multi-GPU IVF-Flat search requires distances to have host compatible memory");
311+
312+
// Validate data types
313+
RAFT_EXPECTS(neighbors.dtype.code == kDLInt && neighbors.dtype.bits == 64,
314+
"neighbors should be of type int64_t");
315+
RAFT_EXPECTS(distances.dtype.code == kDLFloat && distances.dtype.bits == 32,
316+
"distances should be of type float32");
317+
318+
// Check type compatibility between index and queries
319+
RAFT_EXPECTS(queries.dtype.code == index->dtype.code,
320+
"type mismatch between index and queries");
321+
RAFT_EXPECTS(queries.dtype.bits == index->dtype.bits,
322+
"type mismatch between index and queries");
296323

297324
if (queries.dtype.code == kDLFloat && queries.dtype.bits == 32) {
298325
_mg_search<float>(res, *params, *index, queries_tensor, neighbors_tensor, distances_tensor);
@@ -318,6 +345,25 @@ extern "C" cuvsError_t cuvsMultiGpuIvfFlatExtend(cuvsResources_t res,
318345
return cuvs::core::translate_exceptions([=] {
319346
auto vectors = new_vectors_tensor->dl_tensor;
320347

348+
// Multi-GPU IVF-Flat requires vectors to be in host memory
349+
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(vectors),
350+
"Multi-GPU IVF-Flat extend requires new_vectors to have host compatible memory");
351+
352+
// Check type compatibility between index and vectors
353+
RAFT_EXPECTS(vectors.dtype.code == index->dtype.code,
354+
"type mismatch between index and new_vectors");
355+
RAFT_EXPECTS(vectors.dtype.bits == index->dtype.bits,
356+
"type mismatch between index and new_vectors");
357+
358+
// If indices are provided, they should also be in host memory
359+
if (new_indices_tensor != nullptr) {
360+
auto indices = new_indices_tensor->dl_tensor;
361+
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(indices),
362+
"Multi-GPU IVF-Flat extend requires new_indices to have host compatible memory");
363+
RAFT_EXPECTS(indices.dtype.code == kDLInt && indices.dtype.bits == 64,
364+
"new_indices should be of type int64_t");
365+
}
366+
321367
if (vectors.dtype.code == kDLFloat && vectors.dtype.bits == 32) {
322368
_mg_extend<float>(res, *index, new_vectors_tensor, new_indices_tensor);
323369
} else if (vectors.dtype.code == kDLFloat && vectors.dtype.bits == 16) {

cpp/src/neighbors/mg_ivf_pq_c.cpp

Lines changed: 51 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,12 @@ extern "C" cuvsError_t cuvsMultiGpuIvfPqBuild(cuvsResources_t res,
256256
cuvsMultiGpuIvfPqIndex_t index)
257257
{
258258
return cuvs::core::translate_exceptions([=] {
259-
auto dataset = dataset_tensor->dl_tensor;
259+
auto dataset = dataset_tensor->dl_tensor;
260+
261+
// Multi-GPU IVF-PQ requires dataset to be in host memory
262+
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(dataset),
263+
"Multi-GPU IVF-PQ build requires dataset to have host compatible memory");
264+
260265
index->dtype.code = dataset.dtype.code;
261266
index->dtype.bits = dataset.dtype.bits;
262267

@@ -284,7 +289,29 @@ extern "C" cuvsError_t cuvsMultiGpuIvfPqSearch(cuvsResources_t res,
284289
DLManagedTensor* distances_tensor)
285290
{
286291
return cuvs::core::translate_exceptions([=] {
287-
auto queries = queries_tensor->dl_tensor;
292+
auto queries = queries_tensor->dl_tensor;
293+
auto neighbors = neighbors_tensor->dl_tensor;
294+
auto distances = distances_tensor->dl_tensor;
295+
296+
// Multi-GPU IVF-PQ requires all tensors to be in host memory
297+
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(queries),
298+
"Multi-GPU IVF-PQ search requires queries to have host compatible memory");
299+
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(neighbors),
300+
"Multi-GPU IVF-PQ search requires neighbors to have host compatible memory");
301+
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(distances),
302+
"Multi-GPU IVF-PQ search requires distances to have host compatible memory");
303+
304+
// Validate data types
305+
RAFT_EXPECTS(neighbors.dtype.code == kDLInt && neighbors.dtype.bits == 64,
306+
"neighbors should be of type int64_t");
307+
RAFT_EXPECTS(distances.dtype.code == kDLFloat && distances.dtype.bits == 32,
308+
"distances should be of type float32");
309+
310+
// Check type compatibility between index and queries
311+
RAFT_EXPECTS(queries.dtype.code == index->dtype.code,
312+
"type mismatch between index and queries");
313+
RAFT_EXPECTS(queries.dtype.bits == index->dtype.bits,
314+
"type mismatch between index and queries");
288315

289316
if (queries.dtype.code == kDLFloat && queries.dtype.bits == 32) {
290317
_mg_search<float>(res, *params, *index, queries_tensor, neighbors_tensor, distances_tensor);
@@ -310,6 +337,25 @@ extern "C" cuvsError_t cuvsMultiGpuIvfPqExtend(cuvsResources_t res,
310337
return cuvs::core::translate_exceptions([=] {
311338
auto vectors = new_vectors_tensor->dl_tensor;
312339

340+
// Multi-GPU IVF-PQ requires vectors to be in host memory
341+
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(vectors),
342+
"Multi-GPU IVF-PQ extend requires new_vectors to have host compatible memory");
343+
344+
// Check type compatibility between index and vectors
345+
RAFT_EXPECTS(vectors.dtype.code == index->dtype.code,
346+
"type mismatch between index and new_vectors");
347+
RAFT_EXPECTS(vectors.dtype.bits == index->dtype.bits,
348+
"type mismatch between index and new_vectors");
349+
350+
// If indices are provided, they should also be in host memory
351+
if (new_indices_tensor != nullptr) {
352+
auto indices = new_indices_tensor->dl_tensor;
353+
RAFT_EXPECTS(cuvs::core::is_dlpack_host_compatible(indices),
354+
"Multi-GPU IVF-PQ extend requires new_indices to have host compatible memory");
355+
RAFT_EXPECTS(indices.dtype.code == kDLInt && indices.dtype.bits == 64,
356+
"new_indices should be of type int64_t");
357+
}
358+
313359
if (vectors.dtype.code == kDLFloat && vectors.dtype.bits == 32) {
314360
_mg_extend<float>(res, *index, new_vectors_tensor, new_indices_tensor);
315361
} else if (vectors.dtype.code == kDLFloat && vectors.dtype.bits == 16) {
@@ -381,28 +427,8 @@ extern "C" cuvsError_t cuvsMultiGpuIvfPqDistribute(cuvsResources_t res,
381427
cuvsMultiGpuIvfPqIndex_t index)
382428
{
383429
return cuvs::core::translate_exceptions([=] {
384-
std::ifstream is(filename, std::ios::in | std::ios::binary);
385-
if (!is) { RAFT_FAIL("Cannot open file %s", filename); }
386-
char dtype_string[4];
387-
is.read(dtype_string, 4);
388-
auto dtype = raft::detail::numpy_serializer::parse_descr(std::string(dtype_string, 4));
389-
is.close();
390-
391-
index->dtype.bits = dtype.itemsize * 8;
392-
if (dtype.kind == 'f' && dtype.itemsize == 4) {
393-
index->dtype.code = kDLFloat;
394-
index->addr = reinterpret_cast<uintptr_t>(_mg_distribute<float>(res, filename));
395-
} else if (dtype.kind == 'f' && dtype.itemsize == 2) {
396-
index->dtype.code = kDLFloat;
397-
index->addr = reinterpret_cast<uintptr_t>(_mg_distribute<half>(res, filename));
398-
} else if (dtype.kind == 'i' && dtype.itemsize == 1) {
399-
index->dtype.code = kDLInt;
400-
index->addr = reinterpret_cast<uintptr_t>(_mg_distribute<int8_t>(res, filename));
401-
} else if (dtype.kind == 'u' && dtype.itemsize == 1) {
402-
index->dtype.code = kDLUInt;
403-
index->addr = reinterpret_cast<uintptr_t>(_mg_distribute<uint8_t>(res, filename));
404-
} else {
405-
RAFT_FAIL("Unsupported index dtype");
406-
}
430+
index->dtype.code = kDLFloat;
431+
index->dtype.bits = 32;
432+
index->addr = reinterpret_cast<uintptr_t>(_mg_distribute<float>(res, filename));
407433
});
408434
}

docs/source/python_api/neighbors.rst

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,26 @@ Nearest Neighbors
55
:language: python
66
:class: highlight
77

8+
Single-GPU Algorithms
9+
#####################
10+
811
.. toctree::
912
:maxdepth: 2
10-
:caption: Contents:
13+
:caption: Single-GPU ANN Algorithms:
1114

1215
neighbors_brute_force.rst
1316
neighbors_cagra.rst
1417
neighbors_hnsw.rst
1518
neighbors_ivf_flat.rst
1619
neighbors_ivf_pq.rst
1720
neighbors_nn_decent.rst
21+
22+
Multi-GPU Algorithms
23+
####################
24+
25+
.. toctree::
26+
:maxdepth: 2
27+
:caption: Multi-GPU Distributed ANN:
28+
29+
neighbors_multi_gpu.rst
1830
neighbors_all_neighbors.rst
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
Multi-GPU CAGRA
2+
===============
3+
4+
Multi-GPU CAGRA extends the graph-based CAGRA algorithm to work across multiple GPUs, providing improved scalability and performance for large-scale vector search. It supports both replicated and sharded distribution modes.
5+
6+
.. role:: py(code)
7+
:language: python
8+
:class: highlight
9+
10+
.. note::
11+
**IMPORTANT**: Multi-GPU CAGRA requires all data (datasets, queries, output arrays) to be in host memory (CPU).
12+
If using CuPy/device arrays, transfer to host with ``array.get()`` or ``cp.asnumpy(array)`` before use.
13+
14+
Index build parameters
15+
######################
16+
17+
.. autoclass:: cuvs.neighbors.mg_cagra.IndexParams
18+
:members:
19+
20+
Index search parameters
21+
#######################
22+
23+
.. autoclass:: cuvs.neighbors.mg_cagra.SearchParams
24+
:members:
25+
26+
Index
27+
#####
28+
29+
.. autoclass:: cuvs.neighbors.mg_cagra.Index
30+
:members:
31+
32+
Index build
33+
###########
34+
35+
.. autofunction:: cuvs.neighbors.mg_cagra.build
36+
37+
Index search
38+
############
39+
40+
.. autofunction:: cuvs.neighbors.mg_cagra.search
41+
42+
Index save
43+
##########
44+
45+
.. autofunction:: cuvs.neighbors.mg_cagra.save
46+
47+
Index load
48+
##########
49+
50+
.. autofunction:: cuvs.neighbors.mg_cagra.load
51+
52+
Index distribute
53+
################
54+
55+
.. autofunction:: cuvs.neighbors.mg_cagra.distribute

0 commit comments

Comments
 (0)