diff --git a/c/src/neighbors/cagra.cpp b/c/src/neighbors/cagra.cpp index e66d49625f..49c14f671f 100644 --- a/c/src/neighbors/cagra.cpp +++ b/c/src/neighbors/cagra.cpp @@ -618,6 +618,8 @@ extern "C" cuvsError_t cuvsCagraExtend(cuvsResources_t res, if ((dataset.dtype.code == kDLFloat) && (dataset.dtype.bits == 32)) { _extend(res, *params, index, additional_dataset_tensor); + } else if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 16) { + _extend(res, *params, index, additional_dataset_tensor); } else if (dataset.dtype.code == kDLInt && dataset.dtype.bits == 8) { _extend(res, *params, index, additional_dataset_tensor); } else if (dataset.dtype.code == kDLUInt && dataset.dtype.bits == 8) { diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index e80220bce0..1b3030460a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -437,6 +437,7 @@ if(NOT BUILD_CPU_ONLY) src/neighbors/cagra_build_int8.cu src/neighbors/cagra_build_uint8.cu src/neighbors/cagra_extend_float.cu + src/neighbors/cagra_extend_half.cu src/neighbors/cagra_extend_int8.cu src/neighbors/cagra_extend_uint8.cu src/neighbors/cagra_optimize.cu diff --git a/cpp/include/cuvs/neighbors/cagra.hpp b/cpp/include/cuvs/neighbors/cagra.hpp index eb63ef475b..c3b5d5eab3 100644 --- a/cpp/include/cuvs/neighbors/cagra.hpp +++ b/cpp/include/cuvs/neighbors/cagra.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -1238,6 +1238,82 @@ void extend( new_dataset_buffer_view = std::nullopt, std::optional> new_graph_buffer_view = std::nullopt); +/** @brief Add new vectors to a CAGRA index + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * auto additional_dataset = raft::make_device_matrix(handle,add_size,dim); + * // set_additional_dataset(additional_dataset.view()); + * + * cagra::extend_params params; + * cagra::extend(res, params, raft::make_const_mdspan(additional_dataset.view()), index); + * @endcode + * + * @param[in] handle raft resources + * @param[in] params extend params + * @param[in] additional_dataset additional dataset on device memory + * @param[in,out] idx CAGRA index + * @param[out] new_dataset_buffer_view memory buffer view for the dataset including the additional + * part. The data will be copied from the current index in this function. The num rows must be the + * sum of the original and additional datasets, cols must be the dimension of the dataset, and the + * stride must be the same as the original index dataset. This view will be stored in the output + * index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. + * This option is useful when users want to manage the memory space for the dataset themselves. + * @param[out] new_graph_buffer_view memory buffer view for the graph including the additional part. + * The data will be copied from the current index in this function. The num rows must be the sum of + * the original and additional datasets and cols must be the graph degree. This view will be stored + * in the output index. It is the caller's responsibility to ensure that dataset stays alive as long + * as the index. This option is useful when users want to manage the memory space for the graph + * themselves. + */ +void extend( + raft::resources const& handle, + const cagra::extend_params& params, + raft::device_matrix_view additional_dataset, + cuvs::neighbors::cagra::index& idx, + std::optional> + new_dataset_buffer_view = std::nullopt, + std::optional> new_graph_buffer_view = std::nullopt); + +/** @brief Add new vectors to a CAGRA index + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * auto additional_dataset = raft::make_host_matrix(handle,add_size,dim); + * // set_additional_dataset(additional_dataset.view()); + * + * cagra::extend_params params; + * cagra::extend(res, params, raft::make_const_mdspan(additional_dataset.view()), index); + * @endcode + * + * @param[in] handle raft resources + * @param[in] params extend params + * @param[in] additional_dataset additional dataset on host memory + * @param[in,out] idx CAGRA index + * @param[out] new_dataset_buffer_view memory buffer view for the dataset including the additional + * part. The data will be copied from the current index in this function. The num rows must be the + * sum of the original and additional datasets, cols must be the dimension of the dataset, and the + * stride must be the same as the original index dataset. This view will be stored in the output + * index. It is the caller's responsibility to ensure that dataset stays alive as long as the index. + * This option is useful when users want to manage the memory space for the dataset themselves. + * @param[out] new_graph_buffer_view memory buffer view for the graph including the additional part. + * The data will be copied from the current index in this function. The num rows must be the sum of + * the original and additional datasets and cols must be the graph degree. This view will be stored + * in the output index. It is the caller's responsibility to ensure that dataset stays alive as long + * as the index. This option is useful when users want to manage the memory space for the graph + * themselves. + */ +void extend( + raft::resources const& handle, + const cagra::extend_params& params, + raft::host_matrix_view additional_dataset, + cuvs::neighbors::cagra::index& idx, + std::optional> + new_dataset_buffer_view = std::nullopt, + std::optional> new_graph_buffer_view = std::nullopt); + /** @brief Add new vectors to a CAGRA index * * Usage example: diff --git a/cpp/src/neighbors/cagra_extend_half.cu b/cpp/src/neighbors/cagra_extend_half.cu new file mode 100644 index 0000000000..66c75b594a --- /dev/null +++ b/cpp/src/neighbors/cagra_extend_half.cu @@ -0,0 +1,36 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "cagra.cuh" +#include + +namespace cuvs::neighbors::cagra { + +#define RAFT_INST_CAGRA_EXTEND(T, IdxT) \ + void extend(raft::resources const& handle, \ + const cagra::extend_params& params, \ + raft::device_matrix_view additional_dataset, \ + cuvs::neighbors::cagra::index& idx, \ + std::optional> ndv, \ + std::optional> ngv) \ + { \ + cuvs::neighbors::cagra::extend(handle, additional_dataset, idx, params, ndv, ngv); \ + } \ + \ + void extend(raft::resources const& handle, \ + const cagra::extend_params& params, \ + raft::host_matrix_view additional_dataset, \ + cuvs::neighbors::cagra::index& idx, \ + std::optional> ndv, \ + std::optional> ngv) \ + { \ + cuvs::neighbors::cagra::extend(handle, additional_dataset, idx, params, ndv, ngv); \ + } + +RAFT_INST_CAGRA_EXTEND(half, uint32_t); + +#undef RAFT_INST_CAGRA_EXTEND + +} // namespace cuvs::neighbors::cagra diff --git a/cpp/tests/neighbors/ann_cagra/test_half_uint32_t.cu b/cpp/tests/neighbors/ann_cagra/test_half_uint32_t.cu index 2c85646189..2a6ead2e56 100644 --- a/cpp/tests/neighbors/ann_cagra/test_half_uint32_t.cu +++ b/cpp/tests/neighbors/ann_cagra/test_half_uint32_t.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -13,11 +13,17 @@ typedef AnnCagraTest AnnCagraTestF16_U32; TEST_P(AnnCagraTestF16_U32, AnnCagra_U32) { this->testCagra(); } TEST_P(AnnCagraTestF16_U32, AnnCagra_I64) { this->testCagra(); } +typedef AnnCagraAddNodesTest AnnCagraAddNodesTestF16_U32; +TEST_P(AnnCagraAddNodesTestF16_U32, AnnCagraAddNodes) { this->testCagra(); } + typedef AnnCagraIndexMergeTest AnnCagraIndexMergeTestF16_U32; TEST_P(AnnCagraIndexMergeTestF16_U32, AnnCagraIndexMerge_U32) { this->testCagra(); } TEST_P(AnnCagraIndexMergeTestF16_U32, AnnCagraIndexMerge_I64) { this->testCagra(); } INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestF16_U32, ::testing::ValuesIn(inputs)); +INSTANTIATE_TEST_CASE_P(AnnCagraAddNodesTest, + AnnCagraAddNodesTestF16_U32, + ::testing::ValuesIn(inputs_addnode)); INSTANTIATE_TEST_CASE_P(AnnCagraIndexMergeTest, AnnCagraIndexMergeTestF16_U32, ::testing::ValuesIn(inputs)); diff --git a/python/cuvs/cuvs/tests/test_cagra.py b/python/cuvs/cuvs/tests/test_cagra.py index 3468a086b9..c0d436951e 100644 --- a/python/cuvs/cuvs/tests/test_cagra.py +++ b/python/cuvs/cuvs/tests/test_cagra.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # @@ -182,6 +182,7 @@ def test_filtered_cagra(sparsity): "params", [ { + "dtype": np.int8, "intermediate_graph_degree": 64, "graph_degree": 32, "test_extend": False, @@ -190,6 +191,7 @@ def test_filtered_cagra(sparsity): "build_algo": "ivf_pq", }, { + "dtype": np.float32, "intermediate_graph_degree": 32, "graph_degree": 16, "test_extend": True, @@ -198,6 +200,7 @@ def test_filtered_cagra(sparsity): "build_algo": "ivf_pq", }, { + "dtype": np.float32, "intermediate_graph_degree": 128, "graph_degree": 32, "test_extend": False, @@ -206,6 +209,7 @@ def test_filtered_cagra(sparsity): "build_algo": "nn_descent", }, { + "dtype": np.float16, "intermediate_graph_degree": 64, "graph_degree": 32, "test_extend": True, @@ -219,6 +223,7 @@ def test_cagra_index_params(params): # Note that inner_product tests use normalized input which we cannot # represent in int8, therefore we test only sqeuclidean metric here. run_cagra_build_search_test( + dtype=params["dtype"], test_extend=params["test_extend"], k=params["k"], metric=params["metric"],