Skip to content

Commit 409a918

Browse files
authored
Merge branch 'branch-25.06' into get-version-jni-c-api
2 parents 76f40b0 + 3f95d4a commit 409a918

18 files changed

Lines changed: 319 additions & 97 deletions

File tree

cpp/include/cuvs/neighbors/all_neighbors.hpp

Lines changed: 53 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
#include <variant>
2323

2424
namespace cuvs::neighbors::all_neighbors {
25+
/**
26+
* @defgroup all_neighbors_cpp_params The all-neighbors algorithm parameters.
27+
* @{
28+
*/
2529

2630
/**
2731
* @brief Parameters used to build an all-neighbors knn graph (find nearest neighbors for all the
@@ -43,20 +47,21 @@ using GraphBuildParams =
4347
std::variant<graph_build_params::ivf_pq_params, graph_build_params::nn_descent_params>;
4448

4549
/**
46-
* @brief Parameters used to build an all-neighbors graph (find nearest neighbors for all the
47-
* training vectors)
48-
*
49-
* graph_build_params: graph building parameters for the given graph building algorithm. defaults
50-
* to ivfpq.
51-
* n_nearest_clusters: number of nearest clusters each data point will be assigned to in
52-
* the batching algorithm
53-
* n_clusters: number of total clusters (aka batches) to split the data into. If set to 1, algorithm
54-
* creates an all-neighbors graph without batching
55-
* metric: metric type
50+
* @brief Parameters used to build an all-neighbors graph (find nearest neighbors for all the
51+
* training vectors).
52+
* For scalability, the all-neighbors graph construction algorithm partitions a set of training
53+
* vectors into overlapping clusters, computes a local knn graph on each cluster, and merges the
54+
* local graphs into a single global graph.
55+
* Device memory usage and accuracy can be configured by changing the `overlap_factor` and
56+
* `n_clusters`.
57+
* The algorithm used to build each local graph is also configurable.
5658
*
5759
*/
5860
struct all_neighbors_params {
5961
/** Parameters for knn graph building algorithm
62+
* Approximate nearest neighbors methods are used to build the knn graph. Currently supported
63+
* options are 'IVF-PQ' and 'NN Descent'. IVF-PQ is more accurate, but slower compared to NN
64+
* Descent.
6065
*
6166
* Set ivf_pq_params, or nn_descent_params to select the graph build
6267
* algorithm and control their parameters.
@@ -74,35 +79,55 @@ struct all_neighbors_params {
7479
GraphBuildParams graph_build_params;
7580

7681
/**
77-
* Usage of n_nearest_clusters and n_clusters
82+
* Number of nearest clusters each data point will be assigned to in the batching algorithm.
83+
* Start with `overlap_factor = 2` and gradually increase (2->3->4 ...) for better accuracy at the
84+
* cost of device memory usage.
85+
*/
86+
size_t overlap_factor = 2;
87+
88+
/**
89+
* Number of total clusters (aka batches) to split the data into. If set to 1, algorithm creates
90+
* an all-neighbors graph without batching.
91+
* Start with `n_clusters = 4` and increase (4 → 8 → 16...) for less device memory usage at the
92+
* cost of accuracy. This is independent from `overlap_factor` as long as `overlap_factor` <
93+
* `n_clusters`.
7894
*
79-
* The ratio of n_nearest_clusters / n_clusters determines device memory usage.
80-
* Approximately (n_nearest_clusters / n_clusters) * num_rows_in_entire_data number of rows will
95+
* The ratio of `overlap_factor / n_clusters` determines device memory usage.
96+
* Approximately `(overlap_factor / n_clusters) * num_rows_in_entire_data` number of rows will
8197
* be put on device memory at once.
82-
* E.g. between (n_nearest_clusters / n_clusters) = 2/10 and 2/20, the latter will use less device
98+
* E.g. between `(overlap_factor / n_clusters)` = 2/10 and 2/20, the latter will use less device
8399
* memory.
84100
*
85-
* Larger n_nearest_clusters results in better accuracy of the final all-neighbors knn
86-
* graph. E.g. With the similar device memory usages, (n_nearest_clusters / n_clusters) = 4/20
101+
* Larger `overlap_factor` results in better accuracy of the final all-neighbors knn
102+
* graph. E.g. While using similar device memory, `(overlap_factor / n_clusters)` = 4/20
87103
* will have better accuracy than 2/10 at the cost of performance.
104+
*
88105
*/
89-
size_t n_nearest_clusters = 2;
90-
size_t n_clusters = 1; // defaults to not batching
106+
size_t n_clusters = 1; // defaults to not batching
107+
108+
/** Metric used. */
91109
cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded;
92110
};
93111

112+
/** @} */
113+
114+
/**
115+
* @defgroup all_neighbors_cpp_build The all-neighbors knn graph build
116+
* @{
117+
*/
118+
94119
/**
95120
* @brief Builds an approximate all-neighbors knn graph (find nearest neighbors for all the
96121
* training vectors)
97122
*
98123
* Usage example:
99124
* @code{.cpp}
100-
* using namespace cuvs::neighbors;
101-
* // use default index parameters
102-
* all_neighbors::all_neighbors_params params;
125+
* using namespace cuvs::neighbors;
126+
* // use default index parameters
127+
* all_neighbors::all_neighbors_params params;
103128
* auto indices = raft::make_device_matrix<int64_t, int64_t>(handle, n_row, k);
104129
* auto distances = raft::make_device_matrix<float, int64_t>(handle, n_row, k);
105-
* all_neighbors::build(res, params, dataset, indices.view(), distances.view());
130+
* all_neighbors::build(res, params, dataset, indices.view(), distances.view());
106131
* @endcode
107132
*
108133
* @param[in] handle raft::resources is an object mangaging resources
@@ -127,12 +152,12 @@ void build(
127152
*
128153
* Usage example:
129154
* @code{.cpp}
130-
* using namespace cuvs::neighbors;
131-
* // use default index parameters
132-
* all_neighbors::all_neighbors_params params;
155+
* using namespace cuvs::neighbors;
156+
* // use default index parameters
157+
* all_neighbors::all_neighbors_params params;
133158
* auto indices = raft::make_device_matrix<int64_t, int64_t>(handle, n_row, k);
134159
* auto distances = raft::make_device_matrix<float, int64_t>(handle, n_row, k);
135-
* all_neighbors::build(res, params, dataset, indices.view(), distances.view());
160+
* all_neighbors::build(res, params, dataset, indices.view(), distances.view());
136161
* @endcode
137162
*
138163
* @param[in] handle raft::resources is an object mangaging resources
@@ -149,4 +174,6 @@ void build(
149174
raft::device_matrix_view<const float, int64_t, row_major> dataset,
150175
raft::device_matrix_view<int64_t, int64_t, row_major> indices,
151176
std::optional<raft::device_matrix_view<float, int64_t, row_major>> distances = std::nullopt);
177+
178+
/** @} */
152179
} // namespace cuvs::neighbors::all_neighbors

cpp/include/cuvs/neighbors/ivf_flat.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,25 @@ cuvsError_t cuvsIvfFlatIndexCreate(cuvsIvfFlatIndex_t* index);
164164
* @param[in] index cuvsIvfFlatIndex_t to de-allocate
165165
*/
166166
cuvsError_t cuvsIvfFlatIndexDestroy(cuvsIvfFlatIndex_t index);
167+
168+
/** Get the number of clusters/inverted lists */
169+
uint32_t cuvsIvfFlatIndexGetNLists(cuvsIvfFlatIndex_t index);
170+
171+
/** Get the dimensionality of the data */
172+
uint32_t cuvsIvfFlatIndexGetDim(cuvsIvfFlatIndex_t index);
173+
174+
/**
175+
* @brief Get the cluster centers corresponding to the lists [n_lists, dim]
176+
*
177+
* @param[in] res cuvsResources_t opaque C handle
178+
* @param[in] index cuvsIvfFlatIndex_t Built Ivf-Flat Index
179+
* @param[out] centers Preallocated array on host or device memory to store output, [n_lists, dim]
180+
* @return cuvsError_t
181+
*/
182+
cuvsError_t cuvsIvfFlatIndexGetCenters(cuvsResources_t res,
183+
cuvsIvfFlatIndex_t index,
184+
DLManagedTensor* centers);
185+
167186
/**
168187
* @}
169188
*/

cpp/include/cuvs/neighbors/ivf_pq.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -264,15 +264,16 @@ cuvsError_t cuvsIvfPqIndexDestroy(cuvsIvfPqIndex_t index);
264264
/** Get the number of clusters/inverted lists */
265265
uint32_t cuvsIvfPqIndexGetNLists(cuvsIvfPqIndex_t index);
266266

267-
/** Get the dimensionality of the cluster centers */
268-
uint32_t cuvsIvfPqIndexGetDimExt(cuvsIvfPqIndex_t index);
267+
/** Get the dimensionality */
268+
uint32_t cuvsIvfPqIndexGetDim(cuvsIvfPqIndex_t index);
269269

270270
/**
271271
* @brief Get the cluster centers corresponding to the lists in the original space
272272
*
273273
* @param[in] res cuvsResources_t opaque C handle
274-
* @param[in] index cuvsIvfPqIndex_t Built NN-Descent index
275-
* @param[out] centers Preallocated array on host memory to store output, [n_lists, dim_ext]
274+
* @param[in] index cuvsIvfPqIndex_t Built Ivf-Pq index
275+
* @param[out] centers Preallocated array on host or device memory to store output,
276+
* dimensions [n_lists, dim]
276277
* @return cuvsError_t
277278
*/
278279
cuvsError_t cuvsIvfPqIndexGetCenters(cuvsResources_t res,

cpp/include/cuvs/neighbors/ivf_pq.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2861,6 +2861,10 @@ void extract_centers(raft::resources const& res,
28612861
const index<int64_t>& index,
28622862
raft::device_matrix_view<float, uint32_t, raft::row_major> cluster_centers);
28632863

2864+
/** @copydoc extract_centers */
2865+
void extract_centers(raft::resources const& res,
2866+
const index<int64_t>& index,
2867+
raft::host_matrix_view<float, uint32_t, raft::row_major> cluster_centers);
28642868
/**
28652869
* @brief Helper exposing the re-computation of list sizes and related arrays if IVF lists have been
28662870
* modified externally.

cpp/src/neighbors/all_neighbors/all_neighbors_batched.cuh

Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ void get_centroids_on_data_subsample(raft::resources const& res,
7171
template <typename T, typename IdxT>
7272
void single_gpu_assign_clusters(
7373
raft::resources const& res,
74-
size_t n_nearest_clusters,
74+
size_t overlap_factor,
7575
size_t n_clusters,
7676
size_t n_rows_per_batch,
7777
size_t base_row_offset,
@@ -89,10 +89,10 @@ void single_gpu_assign_clusters(
8989
auto dataset_batch_d =
9090
raft::make_device_matrix<T, IdxT, raft::row_major>(res, n_rows_per_batch, num_cols);
9191

92-
auto nearest_clusters_idx_d = raft::make_device_matrix<IdxT, int64_t, raft::row_major>(
93-
res, n_rows_per_batch, n_nearest_clusters);
94-
auto nearest_clusters_dist_d = raft::make_device_matrix<T, int64_t, raft::row_major>(
95-
res, n_rows_per_batch, n_nearest_clusters);
92+
auto nearest_clusters_idx_d =
93+
raft::make_device_matrix<IdxT, int64_t, raft::row_major>(res, n_rows_per_batch, overlap_factor);
94+
auto nearest_clusters_dist_d =
95+
raft::make_device_matrix<T, int64_t, raft::row_major>(res, n_rows_per_batch, overlap_factor);
9696

9797
std::optional<raft::device_vector_view<const T, int64_t>> norms_view;
9898
cuvs::neighbors::brute_force::index<T> brute_force_index(res, centroids, norms_view, metric);
@@ -111,21 +111,21 @@ void single_gpu_assign_clusters(
111111
raft::make_const_mdspan(dataset_batch_d.view()),
112112
nearest_clusters_idx_d.view(),
113113
nearest_clusters_dist_d.view());
114-
raft::copy(global_nearest_cluster.data_handle() + row_offset * n_nearest_clusters,
114+
raft::copy(global_nearest_cluster.data_handle() + row_offset * overlap_factor,
115115
nearest_clusters_idx_d.data_handle(),
116-
n_rows_of_current_batch * n_nearest_clusters,
116+
n_rows_of_current_batch * overlap_factor,
117117
resource::get_cuda_stream(res));
118118
}
119119
}
120120

121121
/**
122-
* Assign each data point to top n_nearest_clusters number of clusters. Loads the data in batches
122+
* Assign each data point to top overlap_factor number of clusters. Loads the data in batches
123123
* onto device for efficiency. Arguments:
124124
* - [in] res: raft resource
125125
* - [in] params: params for graph building
126126
* - [in] dataset [num_rows x num_cols]: entire dataset located on host memory
127127
* - [in] centroids [n_clusters x num_cols] : centroid vectors
128-
* - [out] global_nearest_cluster [num_rows X n_nearest_clusters] : top n_nearest_clusters closest
128+
* - [out] global_nearest_cluster [num_rows X overlap_factor] : top overlap_factor closest
129129
* clusters for each data point
130130
*/
131131
template <typename T, typename IdxT>
@@ -169,7 +169,7 @@ void assign_clusters(raft::resources const& res,
169169
size_t base_row_offset_for_this_rank = n_rows_per_cluster * base_cluster_idx;
170170

171171
single_gpu_assign_clusters(dev_res,
172-
params.n_nearest_clusters,
172+
params.overlap_factor,
173173
n_clusters_for_this_rank,
174174
n_rows_per_cluster,
175175
base_row_offset_for_this_rank,
@@ -180,7 +180,7 @@ void assign_clusters(raft::resources const& res,
180180
}
181181
} else {
182182
single_gpu_assign_clusters(res,
183-
params.n_nearest_clusters,
183+
params.overlap_factor,
184184
params.n_clusters,
185185
n_rows_per_cluster,
186186
0,
@@ -195,9 +195,9 @@ void assign_clusters(raft::resources const& res,
195195
* Getting data indices that belong to cluster
196196
* Arguments:
197197
* - [in] res: raft resource
198-
* - [in] global_nearest_cluster [num_rows X n_nearest_clusters] : top n_nearest_clusters closest
198+
* - [in] global_nearest_cluster [num_rows X overlap_factor] : top overlap_factor closest
199199
* clusters for each data point
200-
* - [out] inverted_indices [num_rows x n_nearest_clusters sized vector] : vector for data indices
200+
* - [out] inverted_indices [num_rows x overlap_factor sized vector] : vector for data indices
201201
* for each cluster
202202
* - [out] cluster_sizes [n_cluster] : cluster size for each cluster
203203
* - [out] cluster_offsets [n_cluster] : offset in inverted_indices for each cluster
@@ -210,17 +210,17 @@ void get_inverted_indices(raft::resources const& res,
210210
raft::host_vector_view<IdxT, IdxT> cluster_offsets)
211211
{
212212
// build sparse inverted indices and get number of data points for each cluster
213-
size_t num_rows = global_nearest_cluster.extent(0);
214-
size_t n_nearest_clusters = global_nearest_cluster.extent(1);
215-
size_t n_clusters = cluster_sizes.extent(0);
213+
size_t num_rows = global_nearest_cluster.extent(0);
214+
size_t overlap_factor = global_nearest_cluster.extent(1);
215+
size_t n_clusters = cluster_sizes.extent(0);
216216

217217
auto local_offsets = raft::make_host_vector<IdxT>(n_clusters);
218218

219219
std::fill(cluster_sizes.data_handle(), cluster_sizes.data_handle() + n_clusters, 0);
220220
std::fill(local_offsets.data_handle(), local_offsets.data_handle() + n_clusters, 0);
221221

222222
for (size_t i = 0; i < num_rows; i++) {
223-
for (size_t j = 0; j < n_nearest_clusters; j++) {
223+
for (size_t j = 0; j < overlap_factor; j++) {
224224
IdxT cluster_id = global_nearest_cluster(i, j);
225225
cluster_sizes(cluster_id) += 1;
226226
}
@@ -231,7 +231,7 @@ void get_inverted_indices(raft::resources const& res,
231231
cluster_offsets(i) = cluster_offsets(i - 1) + cluster_sizes(i - 1);
232232
}
233233
for (size_t i = 0; i < num_rows; i++) {
234-
for (size_t j = 0; j < n_nearest_clusters; j++) {
234+
for (size_t j = 0; j < overlap_factor; j++) {
235235
IdxT cluster_id = global_nearest_cluster(i, j);
236236
inverted_indices(cluster_offsets(cluster_id) + local_offsets(cluster_id)) = i;
237237
local_offsets(cluster_id) += 1;
@@ -389,20 +389,19 @@ void batch_build(
389389
size_t num_cols = static_cast<size_t>(dataset.extent(1));
390390
size_t k = indices.extent(1);
391391

392-
RAFT_EXPECTS(params.n_clusters > params.n_nearest_clusters,
393-
"n_nearest_clusters should be smaller than n_clusters. We recommend starting from "
394-
"n_nearest_clusters=2 and gradually increase it for better knn graph recall.");
392+
RAFT_EXPECTS(params.n_clusters > params.overlap_factor,
393+
"overlap_factor should be smaller than n_clusters. We recommend starting from "
394+
"overlap_factor=2 and gradually increase it for better knn graph recall.");
395395

396396
auto centroids = raft::make_device_matrix<T, IdxT>(handle, params.n_clusters, num_cols);
397397
get_centroids_on_data_subsample<T, IdxT>(handle, params.metric, dataset, centroids.view());
398398

399-
auto global_nearest_cluster =
400-
raft::make_host_matrix<IdxT, IdxT>(num_rows, params.n_nearest_clusters);
399+
auto global_nearest_cluster = raft::make_host_matrix<IdxT, IdxT>(num_rows, params.overlap_factor);
401400
assign_clusters<T, IdxT>(
402401
handle, params, dataset, centroids.view(), global_nearest_cluster.view());
403402

404403
auto inverted_indices =
405-
raft::make_host_vector<IdxT, IdxT, raft::row_major>(num_rows * params.n_nearest_clusters);
404+
raft::make_host_vector<IdxT, IdxT, raft::row_major>(num_rows * params.overlap_factor);
406405
auto cluster_sizes = raft::make_host_vector<IdxT, IdxT, raft::row_major>(params.n_clusters);
407406
auto cluster_offsets = raft::make_host_vector<IdxT, IdxT, raft::row_major>(params.n_clusters);
408407
get_inverted_indices(handle,

0 commit comments

Comments
 (0)