diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d4ab1e62bb..5f016ed884 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -243,6 +243,7 @@ add_library( src/neighbors/cagra_serialize_float.cu src/neighbors/cagra_serialize_int8.cu src/neighbors/cagra_serialize_uint8.cu + src/neighbors/detail/cagra/cagra_build.cpp src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu @@ -425,6 +426,7 @@ add_library( src/neighbors/ivf_pq/detail/ivf_pq_search_with_filter_float_int64_t.cu src/neighbors/ivf_pq/detail/ivf_pq_search_with_filter_int8_t_int64_t.cu src/neighbors/ivf_pq/detail/ivf_pq_search_with_filter_uint8_t_int64_t.cu + src/neighbors/nn_descent.cu src/neighbors/nn_descent_float.cu src/neighbors/nn_descent_int8.cu src/neighbors/nn_descent_uint8.cu diff --git a/cpp/include/cuvs/neighbors/cagra.h b/cpp/include/cuvs/neighbors/cagra.h index 727c39c6ef..fc01af9d6f 100644 --- a/cpp/include/cuvs/neighbors/cagra.h +++ b/cpp/include/cuvs/neighbors/cagra.h @@ -34,6 +34,8 @@ extern "C" { * */ enum cuvsCagraGraphBuildAlgo { + /* Select build algorithm automatically */ + AUTO_SELECT, /* Use IVF-PQ to build all-neighbors knn graph */ IVF_PQ, /* Experimental, use NN-Descent to build all-neighbors knn graph */ diff --git a/cpp/include/cuvs/neighbors/cagra.hpp b/cpp/include/cuvs/neighbors/cagra.hpp index 9d976d28e0..c5f2ab87d9 100644 --- a/cpp/include/cuvs/neighbors/cagra.hpp +++ b/cpp/include/cuvs/neighbors/cagra.hpp @@ -19,6 +19,8 @@ #include "common.hpp" #include #include +#include +#include #include #include #include @@ -29,6 +31,7 @@ #include #include +#include namespace cuvs::neighbors::cagra { /** @@ -37,31 +40,70 @@ namespace cuvs::neighbors::cagra { */ /** - * @brief ANN algorithm used by CAGRA to build knn graph + * @brief ANN parameters used by CAGRA to build knn graph * */ -enum class graph_build_algo { - /* Use IVF-PQ to build all-neighbors knn graph */ - IVF_PQ, - /* Experimental, use NN-Descent to build all-neighbors knn graph */ - NN_DESCENT +namespace graph_build_params { + +/** Specialized parameters utilizing IVF-PQ to build knn graph */ +struct ivf_pq_params { + cuvs::neighbors::ivf_pq::index_params build_params; + cuvs::neighbors::ivf_pq::search_params search_params; + float refinement_rate; + + ivf_pq_params() = default; + /** + * Set default parameters based on shape of the input dataset. + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * raft::resources res; + * // create index_params for a [N. D] dataset + * auto dataset = raft::make_device_matrix(res, N, D); + * auto pq_params = + * cagra::graph_build_params::ivf_pq_params(dataset.extents()); + * // modify/update index_params as needed + * index_params.add_data_on_build = true; + * @endcode + */ + ivf_pq_params(raft::matrix_extent dataset_extents, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded); }; +using nn_descent_params = cuvs::neighbors::nn_descent::index_params; +} // namespace graph_build_params + struct index_params : cuvs::neighbors::index_params { /** Degree of input graph for pruning. */ size_t intermediate_graph_degree = 128; /** Degree of output graph. */ size_t graph_degree = 64; - /** ANN algorithm to build knn graph. */ - graph_build_algo build_algo = graph_build_algo::IVF_PQ; - /** Number of Iterations to run if building with NN_DESCENT */ - size_t nn_descent_niter = 20; /** * Specify compression parameters if compression is desired. - * - * NOTE: this is experimental new API, consider it unsafe. */ std::optional compression = std::nullopt; + + /** Parameters for graph building. + * + * Set ivf_pq_params or nn_descent_params to select the graph build algorithm and control their + * parameters. The default (std::monostate) is to use a heuristic to decide the algorithm and its + * parameters. + * + * @code{.cpp} + * cagra::index_params params; + * // 1. Choose IVF-PQ algorithm + * params.graph_build_params = cagra::graph_build_params::ivf_pq_params(dataset.extent, + * params.metric); + * + * // 2. Choose NN Descent algorithm for kNN graph construction + * params.graph_build_params = + * cagra::graph_build_params::nn_descent_params(params.intermediate_graph_degree); + * @endcode + */ + std::variant + graph_build_params; }; /** @@ -283,9 +325,7 @@ struct index : cuvs::neighbors::index { "Dataset and knn_graph must have equal number of rows"); update_graph(res, knn_graph); - printf("Called update_graph\n"); raft::resource::sync_stream(res); - printf("Done syncing\n"); } /** @@ -356,7 +396,6 @@ struct index : cuvs::neighbors::index { { RAFT_LOG_DEBUG("Copying CAGRA knn graph from host to device"); - printf("Copying graph...\n"); if ((graph_.extent(0) != knn_graph.extent(0)) || (graph_.extent(1) != knn_graph.extent(1))) { // clear existing memory before allocating to prevent OOM errors on large graphs if (graph_.size()) { graph_ = raft::make_device_matrix(res, 0, 0); } @@ -368,7 +407,6 @@ struct index : cuvs::neighbors::index { knn_graph.size(), raft::resource::get_cuda_stream(res)); graph_view_ = graph_.view(); - printf("Done...\n"); } private: @@ -385,32 +423,219 @@ struct index : cuvs::neighbors::index { * @defgroup cagra_cpp_index_build CAGRA index build functions * @{ */ -auto build(raft::resources const& handle, + +/** + * @brief Build the index from the dataset for efficient search. + * + * The build consist of two steps: build an intermediate knn-graph, and optimize it to + * create the final graph. The index_params struct controls the node degree of these + * graphs. + * + * The following distance metrics are supported: + * - L2 + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * cagra::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = cagra::build(res, index_params, dataset); + * // use default search parameters + * cagra::search_params search_params; + * // search K nearest neighbours + * auto neighbors = raft::make_device_matrix(res, n_queries, k); + * auto distances = raft::make_device_matrix(res, n_queries, k); + * cagra::search(res, search_params, index, queries, neighbors, distances); + * @endcode + * + * @param[in] res + * @param[in] params parameters for building the index + * @param[in] dataset a matrix view (device) to a row-major matrix [n_rows, dim] + * + * @return the constructed cagra index + */ +auto build(raft::resources const& res, const cuvs::neighbors::cagra::index_params& params, raft::device_matrix_view dataset) -> cuvs::neighbors::cagra::index; -auto build(raft::resources const& handle, +/** + * @brief Build the index from the dataset for efficient search. + * + * The build consist of two steps: build an intermediate knn-graph, and optimize it to + * create the final graph. The index_params struct controls the node degree of these + * graphs. + * + * The following distance metrics are supported: + * - L2 + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * cagra::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = cagra::build(res, index_params, dataset); + * // use default search parameters + * cagra::search_params search_params; + * // search K nearest neighbours + * auto neighbors = raft::make_device_matrix(res, n_queries, k); + * auto distances = raft::make_device_matrix(res, n_queries, k); + * cagra::search(res, search_params, index, queries, neighbors, distances); + * @endcode + * + * @param[in] res + * @param[in] params parameters for building the index + * @param[in] dataset a matrix view (host) to a row-major matrix [n_rows, dim] + * + * @return the constructed cagra index + */ +auto build(raft::resources const& res, const cuvs::neighbors::cagra::index_params& params, raft::host_matrix_view dataset) -> cuvs::neighbors::cagra::index; -auto build(raft::resources const& handle, +/** + * @brief Build the index from the dataset for efficient search. + * + * The build consist of two steps: build an intermediate knn-graph, and optimize it to + * create the final graph. The index_params struct controls the node degree of these + * graphs. + * + * The following distance metrics are supported: + * - L2 + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * cagra::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = cagra::build(res, index_params, dataset); + * // use default search parameters + * cagra::search_params search_params; + * // search K nearest neighbours + * auto neighbors = raft::make_device_matrix(res, n_queries, k); + * auto distances = raft::make_device_matrix(res, n_queries, k); + * cagra::search(res, search_params, index, queries, neighbors, distances); + * @endcode + * + * @param[in] res + * @param[in] params parameters for building the index + * @param[in] dataset a matrix view (device) to a row-major matrix [n_rows, dim] + * + * @return the constructed cagra index + */ +auto build(raft::resources const& res, const cuvs::neighbors::cagra::index_params& params, raft::device_matrix_view dataset) -> cuvs::neighbors::cagra::index; -auto build(raft::resources const& handle, +/** + * @brief Build the index from the dataset for efficient search. + * + * The build consist of two steps: build an intermediate knn-graph, and optimize it to + * create the final graph. The index_params struct controls the node degree of these + * graphs. + * + * The following distance metrics are supported: + * - L2 + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * cagra::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = cagra::build(res, index_params, dataset); + * // use default search parameters + * cagra::search_params search_params; + * // search K nearest neighbours + * auto neighbors = raft::make_device_matrix(res, n_queries, k); + * auto distances = raft::make_device_matrix(res, n_queries, k); + * cagra::search(res, search_params, index, queries, neighbors, distances); + * @endcode + * + * @param[in] res + * @param[in] params parameters for building the index + * @param[in] dataset a matrix view (host) to a row-major matrix [n_rows, dim] + * + * @return the constructed cagra index + */ +auto build(raft::resources const& res, const cuvs::neighbors::cagra::index_params& params, raft::host_matrix_view dataset) -> cuvs::neighbors::cagra::index; -auto build(raft::resources const& handle, +/** + * @brief Build the index from the dataset for efficient search. + * + * The build consist of two steps: build an intermediate knn-graph, and optimize it to + * create the final graph. The index_params struct controls the node degree of these + * graphs. + * + * The following distance metrics are supported: + * - L2 + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * cagra::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = cagra::build(res, index_params, dataset); + * // use default search parameters + * cagra::search_params search_params; + * // search K nearest neighbours + * auto neighbors = raft::make_device_matrix(res, n_queries, k); + * auto distances = raft::make_device_matrix(res, n_queries, k); + * cagra::search(res, search_params, index, queries, neighbors, distances); + * @endcode + * + * @param[in] res + * @param[in] params parameters for building the index + * @param[in] dataset a matrix view (device) to a row-major matrix [n_rows, dim] + * + * @return the constructed cagra index + */ +auto build(raft::resources const& res, const cuvs::neighbors::cagra::index_params& params, raft::device_matrix_view dataset) -> cuvs::neighbors::cagra::index; -auto build(raft::resources const& handle, +/** + * @brief Build the index from the dataset for efficient search. + * + * The build consist of two steps: build an intermediate knn-graph, and optimize it to + * create the final graph. The index_params struct controls the node degree of these + * graphs. + * + * The following distance metrics are supported: + * - L2 + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * cagra::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = cagra::build(res, index_params, dataset); + * // use default search parameters + * cagra::search_params search_params; + * // search K nearest neighbours + * auto neighbors = raft::make_device_matrix(res, n_queries, k); + * auto distances = raft::make_device_matrix(res, n_queries, k); + * cagra::search(res, search_params, index, queries, neighbors, distances); + * @endcode + * + * @param[in] res + * @param[in] params parameters for building the index + * @param[in] dataset a matrix view (host) to a row-major matrix [n_rows, dim] + * + * @return the constructed cagra index + */ +auto build(raft::resources const& res, const cuvs::neighbors::cagra::index_params& params, raft::host_matrix_view dataset) -> cuvs::neighbors::cagra::index; @@ -418,56 +643,77 @@ auto build(raft::resources const& handle, * @} */ -void build_device(raft::resources const& handle, - const cuvs::neighbors::cagra::index_params& params, - raft::device_matrix_view dataset, - cuvs::neighbors::cagra::index& idx); - -void build_host(raft::resources const& handle, - const cuvs::neighbors::cagra::index_params& params, - raft::host_matrix_view dataset, - cuvs::neighbors::cagra::index& idx); - -void build_device(raft::resources const& handle, - const cuvs::neighbors::cagra::index_params& params, - raft::device_matrix_view dataset, - cuvs::neighbors::cagra::index& idx); - -void build_host(raft::resources const& handle, - const cuvs::neighbors::cagra::index_params& params, - raft::host_matrix_view dataset, - cuvs::neighbors::cagra::index& idx); - -void build_device(raft::resources const& handle, - const cuvs::neighbors::cagra::index_params& params, - raft::device_matrix_view dataset, - cuvs::neighbors::cagra::index& idx); - -void build_host(raft::resources const& handle, - const cuvs::neighbors::cagra::index_params& params, - raft::host_matrix_view dataset, - cuvs::neighbors::cagra::index& idx); - /** * @defgroup cagra_cpp_index_search CAGRA search functions * @{ */ -void search(raft::resources const& handle, +/** + * @brief Search ANN using the constructed index. + * + * See the [cagra::build](#cagra::build) documentation for a usage example. + * + * @tparam T data element type + * @tparam IdxT type of the indices + * + * @param[in] res raft resources + * @param[in] params configure the search + * @param[in] index cagra index + * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] + * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset + * [n_queries, k] + * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, + * k] + */ +void search(raft::resources const& res, cuvs::neighbors::cagra::search_params const& params, const cuvs::neighbors::cagra::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances); -void search(raft::resources const& handle, +/** + * @brief Search ANN using the constructed index. + * + * See the [cagra::build](#cagra::build) documentation for a usage example. + * + * @tparam T data element type + * @tparam IdxT type of the indices + * + * @param[in] res raft resources + * @param[in] params configure the search + * @param[in] index cagra index + * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] + * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset + * [n_queries, k] + * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, + * k] + */ +void search(raft::resources const& res, cuvs::neighbors::cagra::search_params const& params, const cuvs::neighbors::cagra::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances); -void search(raft::resources const& handle, +/** + * @brief Search ANN using the constructed index. + * + * See the [cagra::build](#cagra::build) documentation for a usage example. + * + * @tparam T data element type + * @tparam IdxT type of the indices + * + * @param[in] res raft resources + * @param[in] params configure the search + * @param[in] index cagra index + * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] + * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset + * [n_queries, k] + * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, + * k] + */ +void search(raft::resources const& res, cuvs::neighbors::cagra::search_params const& params, const cuvs::neighbors::cagra::index& index, raft::device_matrix_view queries, diff --git a/cpp/include/cuvs/neighbors/ivf_pq.hpp b/cpp/include/cuvs/neighbors/ivf_pq.hpp index 8493882b43..d7696a37b9 100644 --- a/cpp/include/cuvs/neighbors/ivf_pq.hpp +++ b/cpp/include/cuvs/neighbors/ivf_pq.hpp @@ -102,31 +102,19 @@ struct index_params : cuvs::neighbors::index_params { * Creates index_params based on shape of the input dataset. * Usage example: * @code{.cpp} - * using namespace raft::neighbors; + * using namespace cuvs::neighbors; * raft::resources res; * // create index_params for a [N. D] dataset and have InnerProduct as the distance metric * auto dataset = raft::make_device_matrix(res, N, D); * ivf_pq::index_params index_params = - * ivf_pq::index_params::from_dataset(dataset.view(), raft::distance::InnerProduct); + * ivf_pq::index_params::from_dataset(dataset.extents(), raft::distance::InnerProduct); * // modify/update index_params as needed * index_params.add_data_on_build = true; * @endcode */ - template static index_params from_dataset( - raft::mdspan, raft::row_major, Accessor> dataset, - cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded) - { - index_params params; - params.n_lists = - dataset.extent(0) < 4 * 2500 ? 4 : static_cast(std::sqrt(dataset.extent(0))); - params.pq_dim = - raft::round_up_safe(static_cast(dataset.extent(1) / 4), static_cast(8)); - params.pq_bits = 8; - params.kmeans_trainset_fraction = dataset.extent(0) < 10000 ? 1 : 0.1; - params.metric = metric; - return params; - } + raft::matrix_extent dataset, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded); }; /** * @} diff --git a/cpp/include/cuvs/neighbors/nn_descent.hpp b/cpp/include/cuvs/neighbors/nn_descent.hpp index 1f216d68f4..9f43001778 100644 --- a/cpp/include/cuvs/neighbors/nn_descent.hpp +++ b/cpp/include/cuvs/neighbors/nn_descent.hpp @@ -53,6 +53,15 @@ struct index_params : cuvs::neighbors::index_params { size_t intermediate_graph_degree = 128; // Degree of input graph for pruning. size_t max_iterations = 20; // Number of nn-descent iterations. float termination_threshold = 0.0001; // Termination threshold of nn-descent. + + /** @brief Construct NN descent parameters for a specific kNN graph degree + * + * @param graph_degree output graph degree + */ + index_params(size_t graph_degree = 64) + : graph_degree(graph_degree), intermediate_graph_degree(1.5 * graph_degree) + { + } }; /** @@ -320,6 +329,8 @@ auto build(raft::resources const& res, raft::device_matrix_view dataset) -> cuvs::neighbors::nn_descent::index; +/** @} */ + /** * @brief Build nn-descent Index with dataset in host memory * @@ -353,7 +364,16 @@ auto build(raft::resources const& res, -> cuvs::neighbors::nn_descent::index; /** - * @} + * @brief Test if we have enough GPU memory to run NN descent algorithm. + * + * @param res + * @param dataset shape of the dataset + * @param idx_size the size of index type in bytes + * @return true if enough GPU memory can be allocated + * @return false otherwise */ +bool has_enough_device_memory(raft::resources const& res, + raft::matrix_extent dataset, + size_t idx_size = 4); } // namespace cuvs::neighbors::nn_descent \ No newline at end of file diff --git a/cpp/src/neighbors/cagra.cuh b/cpp/src/neighbors/cagra.cuh index 997f369a95..0e714370a4 100644 --- a/cpp/src/neighbors/cagra.cuh +++ b/cpp/src/neighbors/cagra.cuh @@ -75,18 +75,14 @@ namespace cuvs::neighbors::cagra { * @param[in] res raft resources * @param[in] dataset a matrix view (host or device) to a row-major matrix [n_rows, dim] * @param[out] knn_graph a host matrix view to store the output knn graph [n_rows, graph_degree] - * @param[in] refine_rate (optional) refinement rate for ivf-pq search - * @param[in] build_params (optional) ivf_pq index building parameters for knn graph - * @param[in] search_params (optional) ivf_pq search parameters + * @param[in] ivf_pq_params ivf-pq parameters for graph build */ template void build_knn_graph( raft::resources const& res, raft::mdspan, raft::row_major, accessor> dataset, raft::host_matrix_view knn_graph, - std::optional refine_rate = std::nullopt, - std::optional build_params = std::nullopt, - std::optional search_params = std::nullopt) + cagra::graph_build_params::ivf_pq_params ivf_pq_params) { using internal_IdxT = typename std::make_unsigned::type; @@ -98,8 +94,12 @@ void build_knn_graph( raft::mdspan, raft::row_major, accessor>( dataset.data_handle(), dataset.extent(0), dataset.extent(1)); - cagra::detail::build_knn_graph( - res, dataset_internal, knn_graph_internal, refine_rate, build_params, search_params); + cagra::detail::build_knn_graph(res, + dataset_internal, + knn_graph_internal, + ivf_pq_params.refinement_rate, + ivf_pq_params.build_params, + ivf_pq_params.search_params); } /** diff --git a/cpp/src/neighbors/cagra_build_float.cu b/cpp/src/neighbors/cagra_build_float.cu index c5e331925b..b990d1b329 100644 --- a/cpp/src/neighbors/cagra_build_float.cu +++ b/cpp/src/neighbors/cagra_build_float.cu @@ -19,37 +19,21 @@ namespace cuvs::neighbors::cagra { -#define RAFT_INST_CAGRA_BUILD(T, IdxT) \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::cagra::index_params& params, \ - raft::device_matrix_view dataset) \ - ->cuvs::neighbors::cagra::index \ - { \ - return cuvs::neighbors::cagra::build(handle, params, dataset); \ - } \ - \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::cagra::index_params& params, \ - raft::host_matrix_view dataset) \ - ->cuvs::neighbors::cagra::index \ - { \ - return cuvs::neighbors::cagra::build(handle, params, dataset); \ - } \ - \ - void build_device(raft::resources const& handle, \ - const cuvs::neighbors::cagra::index_params& params, \ - raft::device_matrix_view dataset, \ - cuvs::neighbors::cagra::index& idx) \ - { \ - idx = build(handle, params, dataset); \ - } \ - \ - void build_host(raft::resources const& handle, \ - const cuvs::neighbors::cagra::index_params& params, \ - raft::host_matrix_view dataset, \ - cuvs::neighbors::cagra::index& idx) \ - { \ - idx = build(handle, params, dataset); \ +#define RAFT_INST_CAGRA_BUILD(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::cagra::index_params& params, \ + raft::device_matrix_view dataset) \ + ->cuvs::neighbors::cagra::index \ + { \ + return cuvs::neighbors::cagra::build(handle, params, dataset); \ + } \ + \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::cagra::index_params& params, \ + raft::host_matrix_view dataset) \ + ->cuvs::neighbors::cagra::index \ + { \ + return cuvs::neighbors::cagra::build(handle, params, dataset); \ } RAFT_INST_CAGRA_BUILD(float, uint32_t); diff --git a/cpp/src/neighbors/cagra_build_int8.cu b/cpp/src/neighbors/cagra_build_int8.cu index e48286b0f5..624ebd060a 100644 --- a/cpp/src/neighbors/cagra_build_int8.cu +++ b/cpp/src/neighbors/cagra_build_int8.cu @@ -19,37 +19,21 @@ namespace cuvs::neighbors::cagra { -#define RAFT_INST_CAGRA_BUILD(T, IdxT) \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::cagra::index_params& params, \ - raft::device_matrix_view dataset) \ - ->cuvs::neighbors::cagra::index \ - { \ - return cuvs::neighbors::cagra::build(handle, params, dataset); \ - } \ - \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::cagra::index_params& params, \ - raft::host_matrix_view dataset) \ - ->cuvs::neighbors::cagra::index \ - { \ - return cuvs::neighbors::cagra::build(handle, params, dataset); \ - } \ - \ - void build_device(raft::resources const& handle, \ - const cuvs::neighbors::cagra::index_params& params, \ - raft::device_matrix_view dataset, \ - cuvs::neighbors::cagra::index& idx) \ - { \ - idx = build(handle, params, dataset); \ - } \ - \ - void build_host(raft::resources const& handle, \ - const cuvs::neighbors::cagra::index_params& params, \ - raft::host_matrix_view dataset, \ - cuvs::neighbors::cagra::index& idx) \ - { \ - idx = build(handle, params, dataset); \ +#define RAFT_INST_CAGRA_BUILD(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::cagra::index_params& params, \ + raft::device_matrix_view dataset) \ + ->cuvs::neighbors::cagra::index \ + { \ + return cuvs::neighbors::cagra::build(handle, params, dataset); \ + } \ + \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::cagra::index_params& params, \ + raft::host_matrix_view dataset) \ + ->cuvs::neighbors::cagra::index \ + { \ + return cuvs::neighbors::cagra::build(handle, params, dataset); \ } RAFT_INST_CAGRA_BUILD(int8_t, uint32_t); diff --git a/cpp/src/neighbors/cagra_build_uint8.cu b/cpp/src/neighbors/cagra_build_uint8.cu index 796fdda745..8fd806c64e 100644 --- a/cpp/src/neighbors/cagra_build_uint8.cu +++ b/cpp/src/neighbors/cagra_build_uint8.cu @@ -19,37 +19,21 @@ namespace cuvs::neighbors::cagra { -#define RAFT_INST_CAGRA_BUILD(T, IdxT) \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::cagra::index_params& params, \ - raft::device_matrix_view dataset) \ - ->cuvs::neighbors::cagra::index \ - { \ - return cuvs::neighbors::cagra::build(handle, params, dataset); \ - } \ - \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::cagra::index_params& params, \ - raft::host_matrix_view dataset) \ - ->cuvs::neighbors::cagra::index \ - { \ - return cuvs::neighbors::cagra::build(handle, params, dataset); \ - } \ - \ - void build_device(raft::resources const& handle, \ - const cuvs::neighbors::cagra::index_params& params, \ - raft::device_matrix_view dataset, \ - cuvs::neighbors::cagra::index& idx) \ - { \ - idx = build(handle, params, dataset); \ - } \ - \ - void build_host(raft::resources const& handle, \ - const cuvs::neighbors::cagra::index_params& params, \ - raft::host_matrix_view dataset, \ - cuvs::neighbors::cagra::index& idx) \ - { \ - idx = build(handle, params, dataset); \ +#define RAFT_INST_CAGRA_BUILD(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::cagra::index_params& params, \ + raft::device_matrix_view dataset) \ + ->cuvs::neighbors::cagra::index \ + { \ + return cuvs::neighbors::cagra::build(handle, params, dataset); \ + } \ + \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::cagra::index_params& params, \ + raft::host_matrix_view dataset) \ + ->cuvs::neighbors::cagra::index \ + { \ + return cuvs::neighbors::cagra::build(handle, params, dataset); \ } RAFT_INST_CAGRA_BUILD(uint8_t, uint32_t); diff --git a/cpp/src/neighbors/cagra_c.cpp b/cpp/src/neighbors/cagra_c.cpp index cadda19d5d..ac458a1aa3 100644 --- a/cpp/src/neighbors/cagra_c.cpp +++ b/cpp/src/neighbors/cagra_c.cpp @@ -38,12 +38,26 @@ void* _build(cuvsResources_t res, cuvsCagraIndexParams params, DLManagedTensor* auto res_ptr = reinterpret_cast(res); auto index = new cuvs::neighbors::cagra::index(*res_ptr); - auto build_params = cuvs::neighbors::cagra::index_params(); - build_params.intermediate_graph_degree = params.intermediate_graph_degree; - build_params.graph_degree = params.graph_degree; - build_params.build_algo = - static_cast(params.build_algo); - build_params.nn_descent_niter = params.nn_descent_niter; + auto index_params = cuvs::neighbors::cagra::index_params(); + index_params.intermediate_graph_degree = params.intermediate_graph_degree; + index_params.graph_degree = params.graph_degree; + + switch (params.build_algo) { + case cuvsCagraGraphBuildAlgo::AUTO_SELECT: break; + case cuvsCagraGraphBuildAlgo::IVF_PQ: { + auto dataset_extent = raft::matrix_extent(dataset.shape[0], dataset.shape[1]); + index_params.graph_build_params = + cuvs::neighbors::cagra::graph_build_params::ivf_pq_params(dataset_extent); + break; + } + case cuvsCagraGraphBuildAlgo::NN_DESCENT: + cuvs::neighbors::cagra::graph_build_params::nn_descent_params nn_descent_params{}; + nn_descent_params = + cuvs::neighbors::nn_descent::index_params(index_params.intermediate_graph_degree); + nn_descent_params.max_iterations = params.nn_descent_niter; + index_params.graph_build_params = nn_descent_params; + break; + }; if (auto* cparams = params.compression; cparams != nullptr) { auto compression_params = cuvs::neighbors::vpq_params(); @@ -53,17 +67,17 @@ void* _build(cuvsResources_t res, cuvsCagraIndexParams params, DLManagedTensor* compression_params.kmeans_n_iters = cparams->kmeans_n_iters; compression_params.vq_kmeans_trainset_fraction = cparams->vq_kmeans_trainset_fraction; compression_params.pq_kmeans_trainset_fraction = cparams->pq_kmeans_trainset_fraction; - build_params.compression.emplace(compression_params); + index_params.compression.emplace(compression_params); } if (cuvs::core::is_dlpack_device_compatible(dataset)) { using mdspan_type = raft::device_matrix_view; auto mds = cuvs::core::from_dlpack(dataset_tensor); - cuvs::neighbors::cagra::build_device(*res_ptr, build_params, mds, *index); + *index = cuvs::neighbors::cagra::build(*res_ptr, index_params, mds); } else if (cuvs::core::is_dlpack_host_compatible(dataset)) { using mdspan_type = raft::host_matrix_view; auto mds = cuvs::core::from_dlpack(dataset_tensor); - cuvs::neighbors::cagra::build_host(*res_ptr, build_params, mds, *index); + *index = cuvs::neighbors::cagra::build(*res_ptr, index_params, mds); } return index; } diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cpp b/cpp/src/neighbors/detail/cagra/cagra_build.cpp new file mode 100644 index 0000000000..7ea45d0638 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/cagra_build.cpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +namespace cuvs::neighbors::cagra::graph_build_params { +ivf_pq_params::ivf_pq_params(raft::matrix_extent dataset_extents, + cuvs::distance::DistanceType metric) +{ + build_params = cuvs::neighbors::ivf_pq::index_params::from_dataset(dataset_extents, metric); + + search_params = cuvs::neighbors::ivf_pq::search_params{}; + search_params.n_probes = std::max(10, build_params.n_lists * 0.01); + search_params.lut_dtype = CUDA_R_16F; + search_params.internal_distance_dtype = CUDA_R_16F; + + refinement_rate = 2; +} +} // namespace cuvs::neighbors::cagra::graph_build_params \ No newline at end of file diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh index 0ca97b9ca9..a50ac2e651 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh @@ -49,17 +49,85 @@ namespace cuvs::neighbors::cagra::detail { static const std::string RAFT_NAME = "raft"; +template +void write_to_graph(raft::host_matrix_view knn_graph, + raft::host_matrix_view neighbors_host_view, + size_t& num_self_included, + size_t batch_size, + size_t batch_offset) +{ + uint32_t node_degree = knn_graph.extent(1); + size_t top_k = neighbors_host_view.extent(1); + // omit itself & write out + for (std::size_t i = 0; i < batch_size; i++) { + size_t vec_idx = i + batch_offset; + for (std::size_t j = 0, num_added = 0; j < top_k && num_added < node_degree; j++) { + const auto v = neighbors_host_view(i, j); + if (static_cast(v) == vec_idx) { + num_self_included++; + continue; + } + knn_graph(vec_idx, num_added) = v; + num_added++; + } + } +} + +template +void refine_host_and_write_graph( + raft::resources const& res, + raft::host_matrix& queries_host, + raft::host_matrix& neighbors_host, + raft::host_matrix& refined_neighbors_host, + raft::host_matrix& refined_distances_host, + raft::mdspan, raft::row_major, accessor> dataset, + raft::host_matrix_view knn_graph, + cuvs::distance::DistanceType metric, + size_t& num_self_included, + size_t batch_size, + size_t batch_offset, + int top_k, + int gpu_top_k) +{ + bool do_refine = top_k != gpu_top_k; + + auto refined_neighbors_host_view = raft::make_host_matrix_view( + do_refine ? refined_neighbors_host.data_handle() : neighbors_host.data_handle(), + batch_size, + top_k); + + if (do_refine) { + // needed for compilation as this routine will also be run for device data with !do_refine + if constexpr (raft::is_host_mdspan_v) { + auto queries_host_view = raft::make_host_matrix_view( + queries_host.data_handle(), batch_size, dataset.extent(1)); + auto neighbors_host_view = raft::make_host_matrix_view( + neighbors_host.data_handle(), batch_size, neighbors_host.extent(1)); + auto refined_distances_host_view = raft::make_host_matrix_view( + refined_distances_host.data_handle(), batch_size, top_k); + cuvs::neighbors::refine(res, + dataset, + queries_host_view, + neighbors_host_view, + refined_neighbors_host_view, + refined_distances_host_view, + metric); + } + } + + write_to_graph( + knn_graph, refined_neighbors_host_view, num_self_included, batch_size, batch_offset); +} + template void build_knn_graph( raft::resources const& res, raft::mdspan, raft::row_major, accessor> dataset, raft::host_matrix_view knn_graph, - std::optional refine_rate = std::nullopt, - std::optional build_params = std::nullopt, - std::optional search_params = std::nullopt) + cuvs::neighbors::cagra::graph_build_params::ivf_pq_params pq) { - RAFT_EXPECTS(!build_params || build_params->metric == cuvs::distance::DistanceType::L2Expanded || - build_params->metric == cuvs::distance::DistanceType::InnerProduct, + RAFT_EXPECTS(pq.build_params.metric == cuvs::distance::DistanceType::L2Expanded || + pq.build_params.metric == cuvs::distance::DistanceType::InnerProduct, "Currently only L2Expanded or InnerProduct metric are supported"); uint32_t node_degree = knn_graph.extent(1); @@ -69,10 +137,6 @@ void build_knn_graph( size_t(dataset.extent(1)), node_degree); - if (!build_params) { - build_params = cuvs::neighbors::ivf_pq::index_params::from_dataset(dataset); - } - // Make model name const std::string model_name = [&]() { char model_name[1024]; @@ -81,29 +145,25 @@ void build_knn_graph( "IVF-PQ", static_cast(dataset.extent(0)), static_cast(dataset.extent(1)), - build_params->n_lists, - build_params->pq_dim, - build_params->pq_bits, - build_params->kmeans_n_iters, - build_params->metric, - static_cast(build_params->codebook_kind)); + pq.build_params.n_lists, + pq.build_params.pq_dim, + pq.build_params.pq_bits, + pq.build_params.kmeans_n_iters, + pq.build_params.metric, + static_cast(pq.build_params.codebook_kind)); return std::string(model_name); }(); RAFT_LOG_DEBUG("# Building IVF-PQ index %s", model_name.c_str()); - auto index = cuvs::neighbors::ivf_pq::detail::build(res, *build_params, dataset); + auto index = + cuvs::neighbors::ivf_pq::detail::build(res, pq.build_params, dataset); // // search top (k + 1) neighbors // - if (!search_params) { - search_params = cuvs::neighbors::ivf_pq::search_params{}; - search_params->n_probes = std::min(dataset.extent(1) * 2, build_params->n_lists); - search_params->lut_dtype = CUDA_R_8U; - search_params->internal_distance_dtype = CUDA_R_32F; - } + const auto top_k = node_degree + 1; - uint32_t gpu_top_k = node_degree * refine_rate.value_or(2.0f); + uint32_t gpu_top_k = node_degree * pq.refinement_rate; gpu_top_k = std::min(std::max(gpu_top_k, top_k), dataset.extent(0)); const auto num_queries = dataset.extent(0); const auto max_batch_size = 1024; @@ -113,7 +173,7 @@ void build_knn_graph( top_k, gpu_top_k, max_batch_size, - search_params->n_probes); + pq.search_params.n_probes); auto distances = raft::make_device_matrix(res, max_batch_size, gpu_top_k); auto neighbors = raft::make_device_matrix(res, max_batch_size, gpu_top_k); @@ -142,6 +202,10 @@ void build_knn_graph( size_t next_report_offset = 0; size_t d_report_offset = dataset.extent(0) / 100; // Report progress in 1% steps. + bool async_host_processing = raft::is_host_mdspan_v || top_k == gpu_top_k; + size_t previous_batch_size = 0; + size_t previous_batch_offset = 0; + for (const auto& batch : vec_batches) { // Map int64_t to uint32_t because ivf_pq requires the latter. // TODO(tfeher): remove this mapping once ivf_pq accepts mdspan with int64_t index type @@ -153,33 +217,62 @@ void build_knn_graph( distances.data_handle(), batch.size(), distances.extent(1)); cuvs::neighbors::ivf_pq::search( - res, *search_params, index, queries_view, neighbors_view, distances_view); - if constexpr (raft::is_host_mdspan_v) { + res, pq.search_params, index, queries_view, neighbors_view, distances_view); + + if (async_host_processing) { + // process previous batch async on host + // NOTE: the async path also covers disabled refinement (top_k == gpu_top_k) + if (previous_batch_size > 0) { + refine_host_and_write_graph(res, + queries_host, + neighbors_host, + refined_neighbors_host, + refined_distances_host, + dataset, + knn_graph, + pq.build_params.metric, + num_self_included, + previous_batch_size, + previous_batch_offset, + top_k, + gpu_top_k); + } + + // copy next batch to host raft::copy(neighbors_host.data_handle(), neighbors.data_handle(), neighbors_view.size(), raft::resource::get_cuda_stream(res)); - raft::copy(queries_host.data_handle(), - batch.data(), - queries_view.size(), - raft::resource::get_cuda_stream(res)); - auto queries_host_view = raft::make_host_matrix_view( - queries_host.data_handle(), batch.size(), batch.row_width()); - auto neighbors_host_view = raft::make_host_matrix_view( - neighbors_host.data_handle(), batch.size(), neighbors.extent(1)); - auto refined_neighbors_host_view = raft::make_host_matrix_view( - refined_neighbors_host.data_handle(), batch.size(), top_k); - auto refined_distances_host_view = raft::make_host_matrix_view( - refined_distances_host.data_handle(), batch.size(), top_k); + if (top_k != gpu_top_k) { + // can be skipped for disabled refinement + raft::copy(queries_host.data_handle(), + batch.data(), + queries_view.size(), + raft::resource::get_cuda_stream(res)); + } + + previous_batch_size = batch.size(); + previous_batch_offset = batch.offset(); + + // we need to ensure the copy operations are done prior using the host data raft::resource::sync_stream(res); - cuvs::neighbors::refine(res, - dataset, - queries_host_view, - neighbors_host_view, - refined_neighbors_host_view, - refined_distances_host_view, - build_params->metric); + // process last batch + if (previous_batch_offset + previous_batch_size == (size_t)num_queries) { + refine_host_and_write_graph(res, + queries_host, + neighbors_host, + refined_neighbors_host, + refined_distances_host, + dataset, + knn_graph, + pq.build_params.metric, + num_self_included, + previous_batch_size, + previous_batch_offset, + top_k, + gpu_top_k); + } } else { auto neighbor_candidates_view = raft::make_device_matrix_view( neighbors.data_handle(), batch.size(), gpu_top_k); @@ -196,26 +289,17 @@ void build_knn_graph( neighbor_candidates_view, refined_neighbors_view, refined_distances_view, - build_params->metric); + pq.build_params.metric); raft::copy(refined_neighbors_host.data_handle(), refined_neighbors_view.data_handle(), refined_neighbors_view.size(), raft::resource::get_cuda_stream(res)); raft::resource::sync_stream(res); - } - // omit itself & write out - // TODO(tfeher): do this in parallel with GPU processing of next batch - for (std::size_t i = 0; i < batch.size(); i++) { - size_t vec_idx = i + batch.offset(); - for (std::size_t j = 0, num_added = 0; j < top_k && num_added < node_degree; j++) { - const auto v = refined_neighbors_host(i, j); - if (static_cast(v) == vec_idx) { - num_self_included++; - continue; - } - knn_graph(vec_idx, num_added) = v; - num_added++; - } + + auto refined_neighbors_host_view = raft::make_host_matrix_view( + refined_neighbors_host.data_handle(), batch.size(), top_k); + write_to_graph( + knn_graph, refined_neighbors_host_view, num_self_included, batch.size(), batch.offset()); } size_t num_queries_done = batch.offset() + batch.size(); @@ -303,12 +387,7 @@ template build( raft::resources const& res, const index_params& params, - raft::mdspan, raft::row_major, Accessor> dataset, - std::optional nn_descent_params = std::nullopt, - std::optional refine_rate = std::nullopt, - std::optional pq_build_params = std::nullopt, - std::optional search_params = std::nullopt, - bool construct_index_with_dataset = true) + raft::mdspan, raft::row_major, Accessor> dataset) { size_t intermediate_degree = params.intermediate_graph_degree; size_t graph_degree = params.graph_degree; @@ -330,20 +409,45 @@ index build( std::optional> knn_graph( raft::make_host_matrix(dataset.extent(0), intermediate_degree)); - if (params.build_algo == graph_build_algo::IVF_PQ) { - build_knn_graph(res, dataset, knn_graph->view(), refine_rate, pq_build_params, search_params); + // Set default value in case knn_build_params is not defined. + auto knn_build_params = params.graph_build_params; + if (std::holds_alternative(params.graph_build_params)) { + // Heuristic to decide default build algo and its params. + if (params.metric == cuvs::distance::DistanceType::L2Expanded && + cuvs::neighbors::nn_descent::has_enough_device_memory( + res, dataset.extents(), sizeof(IdxT))) { + RAFT_LOG_DEBUG("NN descent solver"); + knn_build_params = cagra::graph_build_params::nn_descent_params(intermediate_degree); + } else { + RAFT_LOG_DEBUG("Selecting IVF-PQ solver"); + knn_build_params = cagra::graph_build_params::ivf_pq_params(dataset.extents(), params.metric); + } + } + + // Dispatch based on graph_build_params + if (std::holds_alternative(knn_build_params)) { + auto ivf_pq_params = + std::get(knn_build_params); + build_knn_graph(res, dataset, knn_graph->view(), ivf_pq_params); } else { RAFT_EXPECTS( params.metric == cuvs::distance::DistanceType::L2Expanded, "L2Expanded is the only distance metrics supported for CAGRA build with nn_descent"); - // Use nn-descent to build CAGRA knn graph - if (!nn_descent_params) { - nn_descent_params = cuvs::neighbors::nn_descent::index_params(); - nn_descent_params->graph_degree = intermediate_degree; - nn_descent_params->intermediate_graph_degree = 1.5 * intermediate_degree; - nn_descent_params->max_iterations = params.nn_descent_niter; + auto nn_descent_params = + std::get(knn_build_params); + + if (nn_descent_params.graph_degree != intermediate_degree) { + RAFT_LOG_WARN( + "Graph degree (%lu) for nn-descent needs to match cagra intermediate graph degree (%lu), " + "aligning " + "nn-descent graph_degree.", + nn_descent_params.graph_degree, + intermediate_degree); + nn_descent_params = cagra::graph_build_params::nn_descent_params(intermediate_degree); } - build_knn_graph(res, dataset, knn_graph->view(), *nn_descent_params); + + // Use nn-descent to build CAGRA knn graph + build_knn_graph(res, dataset, knn_graph->view(), nn_descent_params); } auto cagra_graph = raft::make_host_matrix(dataset.extent(0), graph_degree); @@ -355,25 +459,27 @@ index build( knn_graph.reset(); RAFT_LOG_INFO("Graph optimized, creating index"); + // Construct an index from dataset and optimized knn graph. - if (construct_index_with_dataset) { - if (params.compression.has_value()) { - RAFT_EXPECTS(params.metric == cuvs::distance::DistanceType::L2Expanded, - "VPQ compression is only supported with L2Expanded distance mertric"); - index idx(res, params.metric); - idx.update_graph(res, raft::make_const_mdspan(cagra_graph.view())); - idx.update_dataset( - res, - // TODO: hardcoding codebook math to `half`, we can do runtime dispatching later - cuvs::neighbors::vpq_build( - res, *params.compression, dataset)); - - return idx; - } + if (params.compression.has_value()) { + RAFT_EXPECTS(params.metric == cuvs::distance::DistanceType::L2Expanded, + "VPQ compression is only supported with L2Expanded distance mertric"); + index idx(res, params.metric); + idx.update_graph(res, raft::make_const_mdspan(cagra_graph.view())); + idx.update_dataset( + res, + // TODO: hardcoding codebook math to `half`, we can do runtime dispatching later + cuvs::neighbors::vpq_build( + res, *params.compression, dataset)); + + return idx; + } + try { return index(res, params.metric, dataset, raft::make_const_mdspan(cagra_graph.view())); - } else { - // We just add the graph. User is expected to update dataset separately. This branch is used - // if user needs special control of memory allocations for the dataset. + } catch (std::bad_alloc& e) { + RAFT_LOG_DEBUG("Insufficient GPU memory to construct CAGRA index with dataset on GPU"); + // We just add the graph. User is expected to update dataset separately (e.g allocating in + // managed memory). index idx(res, params.metric); idx.update_graph(res, raft::make_const_mdspan(cagra_graph.view())); return idx; diff --git a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh index df703a04e4..4947d3148b 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh @@ -62,19 +62,16 @@ void serialize(raft::resources const& res, dtype_string.resize(4); os << dtype_string; - printf("Inside serialize...\n"); raft::serialize_scalar(res, os, serialization_version); raft::serialize_scalar(res, os, index_.size()); raft::serialize_scalar(res, os, index_.dim()); raft::serialize_scalar(res, os, index_.graph_degree()); raft::serialize_scalar(res, os, index_.metric()); - printf("Serializing mdspan\n"); raft::serialize_mdspan(res, os, index_.graph()); include_dataset &= (index_.data().n_rows() > 0); - printf("Serializing include dataset\n"); raft::serialize_scalar(res, os, include_dataset); if (include_dataset) { RAFT_LOG_INFO("Saving CAGRA index with dataset"); diff --git a/cpp/src/neighbors/detail/nn_descent.cuh b/cpp/src/neighbors/detail/nn_descent.cuh index 31a26aba4c..8c5767c50c 100644 --- a/cpp/src/neighbors/detail/nn_descent.cuh +++ b/cpp/src/neighbors/detail/nn_descent.cuh @@ -32,8 +32,6 @@ #include #include -#include - #include #include #include diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh index 3b0e475f4b..804f25d028 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh @@ -1673,6 +1673,9 @@ auto build(raft::resources const& handle, std::is_same_v, "Unsupported data type"); + std::cout << "using ivf_pq::index_params nrows " << (int)dataset.extent(0) << ", dim " + << (int)dataset.extent(1) << ", n_lits " << (int)params.n_lists << ", pq_dim " + << (int)params.pq_dim << std::endl; RAFT_EXPECTS(n_rows > 0 && dim > 0, "empty dataset"); RAFT_EXPECTS(n_rows >= params.n_lists, "number of rows can't be less than n_lists"); diff --git a/cpp/src/neighbors/ivf_pq_index.cpp b/cpp/src/neighbors/ivf_pq_index.cpp index 286838387c..ee6c5dfb43 100644 --- a/cpp/src/neighbors/ivf_pq_index.cpp +++ b/cpp/src/neighbors/ivf_pq_index.cpp @@ -17,6 +17,21 @@ #include namespace cuvs::neighbors::ivf_pq { +index_params index_params::from_dataset(raft::matrix_extent dataset, + cuvs::distance::DistanceType metric) +{ + index_params params; + params.n_lists = + dataset.extent(0) < 4 * 2500 ? 4 : static_cast(std::sqrt(dataset.extent(0))); + params.n_lists = std::min(params.n_lists, dataset.extent(0)); + params.pq_dim = + raft::round_up_safe(static_cast(dataset.extent(1) / 4), static_cast(8)); + if (params.pq_dim == 0) params.pq_dim = 8; + params.pq_bits = 8; + params.kmeans_trainset_fraction = dataset.extent(0) < 10000 ? 1 : 0.1; + params.metric = metric; + return params; +} template index::index(raft::resources const& handle, const index_params& params, uint32_t dim) diff --git a/cpp/src/neighbors/nn_descent.cu b/cpp/src/neighbors/nn_descent.cu new file mode 100644 index 0000000000..3fa87d7ab9 --- /dev/null +++ b/cpp/src/neighbors/nn_descent.cu @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "detail/nn_descent.cuh" +#include +#include + +namespace cuvs::neighbors::nn_descent { + +/** + * @brief Test if we have enough GPU memory to run NN descent algorithm. + * * + * @param res + * @param dataset shape of the dataset + * @param idx_size the size of index type in bytes + * @return true if enough GPU memory could be allocated + * @return false otherwise + */ +bool has_enough_device_memory(raft::resources const& res, + raft::matrix_extent dataset, + size_t idx_size) +{ + using DistData_t = float; + try { + auto d_data_ = raft::make_device_matrix<__half, size_t, raft::row_major>( + res, dataset.extent(0), dataset.extent(1)); + auto l2_norms_ = raft::make_device_vector(res, dataset.extent(0)); + auto graph_buffer_ = raft::make_device_vector( + res, dataset.extent(0) * idx_size * detail::DEGREE_ON_DEVICE); + + auto dists_buffer_ = raft::make_device_matrix( + res, dataset.extent(0), detail::DEGREE_ON_DEVICE); + + auto d_locks_ = raft::make_device_vector(res, dataset.extent(0)); + + auto d_list_sizes_new_ = raft::make_device_vector(res, dataset.extent(0)); + auto d_list_sizes_old_ = raft::make_device_vector(res, dataset.extent(0)); + RAFT_LOG_DEBUG("Sufficient memory for NN descent"); + return true; + } catch (std::bad_alloc& e) { + RAFT_LOG_DEBUG("Insufficient memory for NN descent"); + return false; + } +} + +} // namespace cuvs::neighbors::nn_descent diff --git a/cpp/src/neighbors/refine/refine_host.hpp b/cpp/src/neighbors/refine/refine_host.hpp index 4f293e5b81..dcaccd5a87 100644 --- a/cpp/src/neighbors/refine/refine_host.hpp +++ b/cpp/src/neighbors/refine/refine_host.hpp @@ -101,32 +101,36 @@ template n_queries) { suggested_n_threads = n_queries; } -#pragma omp parallel num_threads(suggested_n_threads) { - std::vector> refined_pairs(orig_k); - for (size_t i = omp_get_thread_num(); i < n_queries; i += omp_get_num_threads()) { - // Compute the refined distance using original dataset vectors - const DataT* query = queries.data_handle() + dim * i; - for (size_t j = 0; j < orig_k; j++) { - IdxT id = neighbor_candidates(i, j); - DistanceT distance = 0.0; - if (static_cast(id) >= n_rows) { - distance = std::numeric_limits::max(); - } else { - const DataT* row = dataset.data_handle() + dim * id; - for (size_t k = 0; k < dim; k++) { - distance += DC::template eval(query[k], row[k]); + std::vector>> refined_pairs( + suggested_n_threads, std::vector>(orig_k)); +#pragma omp parallel num_threads(suggested_n_threads) + { + auto tid = omp_get_thread_num(); + for (size_t i = tid; i < n_queries; i += omp_get_num_threads()) { + // Compute the refined distance using original dataset vectors + const DataT* query = queries.data_handle() + dim * i; + for (size_t j = 0; j < orig_k; j++) { + IdxT id = neighbor_candidates(i, j); + DistanceT distance = 0.0; + if (static_cast(id) >= n_rows) { + distance = std::numeric_limits::max(); + } else { + const DataT* row = dataset.data_handle() + dim * id; + for (size_t k = 0; k < dim; k++) { + distance += DC::template eval(query[k], row[k]); + } } + refined_pairs[tid][j] = std::make_tuple(distance, id); } - refined_pairs[j] = std::make_tuple(distance, id); - } - // Sort the query neighbors by their refined distances - std::sort(refined_pairs.begin(), refined_pairs.end()); - // Store first refined_k neighbors - for (size_t j = 0; j < refined_k; j++) { - indices(i, j) = std::get<1>(refined_pairs[j]); - if (distances.data_handle() != nullptr) { - distances(i, j) = DC::template postprocess(std::get<0>(refined_pairs[j])); + // Sort the query neighbors by their refined distances + std::sort(refined_pairs[tid].begin(), refined_pairs[tid].end()); + // Store first refined_k neighbors + for (size_t j = 0; j < refined_k; j++) { + indices(i, j) = std::get<1>(refined_pairs[tid][j]); + if (distances.data_handle() != nullptr) { + distances(i, j) = DC::template postprocess(std::get<0>(refined_pairs[tid][j])); + } } } } diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh index 017defe390..ad7f327859 100644 --- a/cpp/test/neighbors/ann_cagra.cuh +++ b/cpp/test/neighbors/ann_cagra.cuh @@ -140,6 +140,16 @@ void GenerateRoundingErrorFreeDataset(const raft::resources& handle, GenerateRoundingErrorFreeDataset_kernel<<>>( ptr, size, resolution); } + +enum class graph_build_algo { + /* Use IVF-PQ to build all-neighbors knn graph */ + IVF_PQ, + /* Experimental, use NN-Descent to build all-neighbors knn graph */ + NN_DESCENT, + /* Choose default automatically */ + AUTO +}; + } // namespace struct AnnCagraInputs { @@ -158,18 +168,22 @@ struct AnnCagraInputs { bool include_serialized_dataset; // std::optional double min_recall; // = std::nullopt; - std::optional compression = std::nullopt; + std::optional ivf_pq_search_refine_ratio = std::nullopt; + std::optional compression = std::nullopt; }; inline ::std::ostream& operator<<(::std::ostream& os, const AnnCagraInputs& p) { std::vector algo = {"single-cta", "multi_cta", "multi_kernel", "auto"}; - std::vector build_algo = {"IVF_PQ", "NN_DESCENT"}; + std::vector build_algo = {"IVF_PQ", "NN_DESCENT", "AUTO"}; os << "{n_queries=" << p.n_queries << ", dataset shape=" << p.n_rows << "x" << p.dim << ", k=" << p.k << ", " << algo.at((int)p.algo) << ", max_queries=" << p.max_queries << ", itopk_size=" << p.itopk_size << ", search_width=" << p.search_width << ", metric=" << static_cast(p.metric) << (p.host_dataset ? ", host" : ", device") << ", build_algo=" << build_algo.at((int)p.build_algo); + if ((int)p.build_algo == 0 && p.ivf_pq_search_refine_ratio) { + os << "(refine_rate=" << *p.ivf_pq_search_refine_ratio << ')'; + } if (p.compression.has_value()) { auto vpq = p.compression.value(); os << ", pq_bits=" << vpq.pq_bits << ", pq_dim=" << vpq.pq_dim @@ -226,7 +240,26 @@ class AnnCagraTest : public ::testing::TestWithParam { cagra::index_params index_params; index_params.metric = ps.metric; // Note: currently ony the cagra::index_params metric is // not used for knn_graph building. - index_params.build_algo = ps.build_algo; + switch (ps.build_algo) { + case graph_build_algo::IVF_PQ: + index_params.graph_build_params = + graph_build_params::ivf_pq_params(raft::matrix_extent(ps.n_rows, ps.dim)); + if (ps.ivf_pq_search_refine_ratio) { + std::get( + index_params.graph_build_params) + .refinement_rate = *ps.ivf_pq_search_refine_ratio; + } + break; + case graph_build_algo::NN_DESCENT: { + index_params.graph_build_params = + graph_build_params::nn_descent_params(index_params.intermediate_graph_degree); + break; + } + case graph_build_algo::AUTO: + // do nothing + break; + }; + index_params.compression = ps.compression; cagra::search_params search_params; search_params.algo = ps.algo; @@ -407,21 +440,21 @@ inline std::vector generate_inputs() {0.995}); inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); - inputs2 = raft::util::itertools::product( - {100}, - {10000, 20000}, - {32}, - {10}, - {graph_build_algo::IVF_PQ, graph_build_algo::NN_DESCENT}, - {search_algo::AUTO}, - {10}, - {0}, // team_size - {64}, - {1}, - {cuvs::distance::DistanceType::L2Expanded}, - {false, true}, - {false}, - {0.995}); + inputs2 = + raft::util::itertools::product({100}, + {10000, 20000}, + {32}, + {10}, + {graph_build_algo::AUTO}, + {search_algo::AUTO}, + {10}, + {0}, // team_size + {64}, + {1}, + {cuvs::distance::DistanceType::L2Expanded}, + {false, true}, + {false}, + {0.985}); inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); // a few PQ configurations @@ -452,6 +485,25 @@ inline std::vector generate_inputs() } } + // refinement options + inputs2 = + raft::util::itertools::product({100}, + {5000}, + {32, 64}, + {16}, + {graph_build_algo::IVF_PQ}, + {search_algo::AUTO}, + {10}, + {0}, // team_size + {64}, + {1}, + {cuvs::distance::DistanceType::L2Expanded}, + {false, true}, + {false}, + {0.99}, + {1.0f, 2.0f, 3.0f}); + inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); + return inputs; }