diff --git a/c/include/cuvs/neighbors/ivf_pq.h b/c/include/cuvs/neighbors/ivf_pq.h index 8d66662a09..5b95dfe7c5 100644 --- a/c/include/cuvs/neighbors/ivf_pq.h +++ b/c/include/cuvs/neighbors/ivf_pq.h @@ -23,9 +23,18 @@ extern "C" { * @brief A type for specifying how PQ codebooks are created * */ -enum codebook_gen { // NOLINT - PER_SUBSPACE = 0, // NOLINT - PER_CLUSTER = 1, // NOLINT +enum cuvsIvfPqCodebookGen { + CUVS_IVF_PQ_CODEBOOK_GEN_PER_SUBSPACE = 0, + CUVS_IVF_PQ_CODEBOOK_GEN_PER_CLUSTER = 1, +}; + +/** + * @brief A type for specifying the memory layout of IVF-PQ list data + * + */ +enum cuvsIvfPqListLayout { + CUVS_IVF_PQ_LIST_LAYOUT_FLAT = 0, + CUVS_IVF_PQ_LIST_LAYOUT_INTERLEAVED = 1, }; /** @@ -80,7 +89,7 @@ struct cuvsIvfPqIndexParams { */ uint32_t pq_dim; /** How PQ codebooks are created. */ - enum codebook_gen codebook_kind; + enum cuvsIvfPqCodebookGen codebook_kind; /** * Apply a random rotation matrix on the input data and queries even if `dim % pq_dim == 0`. * @@ -114,6 +123,14 @@ struct cuvsIvfPqIndexParams { * points to train each codebook. */ uint32_t max_train_points_per_pq_code; + /** + * Memory layout of the IVF-PQ list data. + * + * - CUVS_IVF_PQ_LIST_LAYOUT_FLAT: Codes are stored contiguously, one vector's codes after another. + * - CUVS_IVF_PQ_LIST_LAYOUT_INTERLEAVED: Codes are interleaved for optimized search performance. + * This is the default and recommended for search workloads. + */ + enum cuvsIvfPqListLayout codes_layout; }; typedef struct cuvsIvfPqIndexParams* cuvsIvfPqIndexParams_t; @@ -294,8 +311,8 @@ cuvsError_t cuvsIvfPqIndexGetCentersPadded(cuvsIvfPqIndex_t index, DLManagedTens /** * @brief Get the PQ cluster centers * - * - codebook_gen::PER_SUBSPACE: [pq_dim , pq_len, pq_book_size] - * - codebook_gen::PER_CLUSTER: [n_lists, pq_len, pq_book_size] + * - CUVS_IVF_PQ_CODEBOOK_GEN_PER_SUBSPACE: [pq_dim , pq_len, pq_book_size] + * - CUVS_IVF_PQ_CODEBOOK_GEN_PER_CLUSTER: [n_lists, pq_len, pq_book_size] * * @param[in] index cuvsIvfPqIndex_t Built Ivf-Pq index * @param[out] pq_centers Output tensor that will be populated with a non-owning view of the data @@ -443,8 +460,8 @@ cuvsError_t cuvsIvfPqBuild(cuvsResources_t res, * matrices) * @param[in] dim dimensionality of the input data * @param[in] pq_centers PQ codebook on device memory with required shape: - * - codebook_kind PER_SUBSPACE: [pq_dim, pq_len, pq_book_size] - * - codebook_kind PER_CLUSTER: [n_lists, pq_len, pq_book_size] + * - codebook_kind CUVS_IVF_PQ_CODEBOOK_GEN_PER_SUBSPACE: [pq_dim, pq_len, pq_book_size] + * - codebook_kind CUVS_IVF_PQ_CODEBOOK_GEN_PER_CLUSTER: [n_lists, pq_len, pq_book_size] * @param[in] centers Cluster centers in the original space [n_lists, dim_ext] * where dim_ext = round_up(dim + 1, 8) * @param[in] centers_rot Rotated cluster centers [n_lists, rot_dim] diff --git a/c/src/neighbors/cagra.cpp b/c/src/neighbors/cagra.cpp index 611ef3e086..e344aed415 100644 --- a/c/src/neighbors/cagra.cpp +++ b/c/src/neighbors/cagra.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -365,7 +365,7 @@ static void _populate_c_ivf_pq_params(cuvsIvfPqParams* c_ivf_pq, c_ivf_pq->ivf_pq_build_params->kmeans_trainset_fraction = bp.kmeans_trainset_fraction; c_ivf_pq->ivf_pq_build_params->pq_bits = bp.pq_bits; c_ivf_pq->ivf_pq_build_params->pq_dim = bp.pq_dim; - c_ivf_pq->ivf_pq_build_params->codebook_kind = static_cast(bp.codebook_kind); + c_ivf_pq->ivf_pq_build_params->codebook_kind = static_cast(bp.codebook_kind); c_ivf_pq->ivf_pq_build_params->force_random_rotation = bp.force_random_rotation; c_ivf_pq->ivf_pq_build_params->conservative_memory_allocation = bp.conservative_memory_allocation; c_ivf_pq->ivf_pq_build_params->max_train_points_per_pq_code = bp.max_train_points_per_pq_code; diff --git a/c/src/neighbors/ivf_pq.cpp b/c/src/neighbors/ivf_pq.cpp index f99180697a..4ffff4bfaa 100644 --- a/c/src/neighbors/ivf_pq.cpp +++ b/c/src/neighbors/ivf_pq.cpp @@ -34,6 +34,7 @@ void convert_c_index_params(cuvsIvfPqIndexParams params, cuvs::neighbors::ivf_pq out->force_random_rotation = params.force_random_rotation; out->conservative_memory_allocation = params.conservative_memory_allocation; out->max_train_points_per_pq_code = params.max_train_points_per_pq_code; + out->codes_layout = static_cast((int)params.codes_layout); } void convert_c_search_params(cuvsIvfPqSearchParams params, cuvs::neighbors::ivf_pq::search_params* out) @@ -218,8 +219,16 @@ void _get_list_indices(cuvsIvfPqIndex index, uint32_t label, DLManagedTensor* out_labels) { - auto index_ptr = reinterpret_cast*>(index.addr); - cuvs::core::to_dlpack(index_ptr->lists()[label]->indices.view(), out_labels); + auto index_ptr = reinterpret_cast*>(index.addr); + if (index_ptr->codes_layout() == cuvs::neighbors::ivf_pq::list_layout::FLAT) { + auto& list = + static_cast&>(*index_ptr->lists()[label]); + cuvs::core::to_dlpack(list.indices.view(), out_labels); + } else { + auto& list = static_cast&>( + *index_ptr->lists()[label]); + cuvs::core::to_dlpack(list.indices.view(), out_labels); + } } } // namespace @@ -325,10 +334,11 @@ extern "C" cuvsError_t cuvsIvfPqIndexParamsCreate(cuvsIvfPqIndexParams_t* params .kmeans_trainset_fraction = 0.5, .pq_bits = 8, .pq_dim = 0, - .codebook_kind = codebook_gen::PER_SUBSPACE, + .codebook_kind = CUVS_IVF_PQ_CODEBOOK_GEN_PER_SUBSPACE, .force_random_rotation = false, .conservative_memory_allocation = false, - .max_train_points_per_pq_code = 256}; + .max_train_points_per_pq_code = 256, + .codes_layout = CUVS_IVF_PQ_LIST_LAYOUT_INTERLEAVED}; }); } diff --git a/cpp/cmake/patches/faiss-1.13-cuvs-26.02.diff b/cpp/cmake/patches/faiss-1.13-cuvs-26.02.diff index 1e4443416a..50199e0975 100644 --- a/cpp/cmake/patches/faiss-1.13-cuvs-26.02.diff +++ b/cpp/cmake/patches/faiss-1.13-cuvs-26.02.diff @@ -1,5 +1,5 @@ diff --git a/faiss/gpu/impl/CuvsIVFPQ.cu b/faiss/gpu/impl/CuvsIVFPQ.cu -index 1e2fef225..35b388147 100644 +index 1e2fef225..2ee40da46 100644 --- a/faiss/gpu/impl/CuvsIVFPQ.cu +++ b/faiss/gpu/impl/CuvsIVFPQ.cu @@ -129,8 +129,14 @@ void CuvsIVFPQ::updateQuantizer(Index* quantizer) { @@ -122,7 +122,35 @@ index 1e2fef225..35b388147 100644 } setPQCentroids_(); -@@ -520,7 +583,7 @@ void CuvsIVFPQ::setPQCentroids_() { +@@ -404,10 +467,11 @@ void CuvsIVFPQ::copyInvertedListsFrom(const InvertedLists* ivf) { + auto& cuvs_index_lists = cuvs_index->lists(); + + // conservative memory alloc for cloning cpu inverted lists +- cuvs::neighbors::ivf_pq::list_spec ivf_list_spec{ +- static_cast(bitsPerSubQuantizer_), +- static_cast(numSubQuantizers_), +- true}; ++ cuvs::neighbors::ivf_pq::list_spec_interleaved ++ ivf_list_spec{ ++ static_cast(bitsPerSubQuantizer_), ++ static_cast(numSubQuantizers_), ++ true}; + + for (size_t i = 0; i < nlist; ++i) { + size_t listSize = ivf->list_size(i); +@@ -426,9 +490,9 @@ void CuvsIVFPQ::copyInvertedListsFrom(const InvertedLists* ivf) { + // This cuVS list must currently be empty + FAISS_ASSERT(getListLength(i) == 0); + +- cuvs::neighbors::ivf::resize_list( ++ cuvs::neighbors::ivf_pq::helpers::resize_list( + raft_handle, +- cuvs_index_lists[i], ++ cuvs_index_lists[i], + ivf_list_spec, + static_cast(listSize), + static_cast(0)); +@@ -520,7 +587,7 @@ void CuvsIVFPQ::setPQCentroids_() { auto stream = resources_->getDefaultStreamCurrentDevice(); raft::copy( diff --git a/cpp/include/cuvs/neighbors/common.hpp b/cpp/include/cuvs/neighbors/common.hpp index 4f697b3604..0d23989a33 100644 --- a/cpp/include/cuvs/neighbors/common.hpp +++ b/cpp/include/cuvs/neighbors/common.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -683,11 +683,52 @@ template constexpr static IdxT kInvalidRecord = (std::is_signed_v ? IdxT{0} : std::numeric_limits::max()) - 1; +/** + * Abstract base class for IVF list data. + * This allows polymorphic access to list data regardless of the underlying layout. + * + * @tparam ValueT The data element type (e.g., uint8_t for PQ codes, float for raw vectors) + * @tparam IdxT The index type for source indices + * @tparam SizeT The size type + * + * TODO: Make this struct internal (tracking issue: https://github.com/rapidsai/cuvs/issues/1726) + */ +template +struct list_base { + using value_type = ValueT; + using index_type = IdxT; + using size_type = SizeT; + + virtual ~list_base() = default; + + /** Get the raw data pointer. */ + virtual value_type* data_ptr() noexcept = 0; + virtual const value_type* data_ptr() const noexcept = 0; + + /** Get the indices pointer. */ + virtual index_type* indices_ptr() noexcept = 0; + virtual const index_type* indices_ptr() const noexcept = 0; + + /** Get the current size (number of records). */ + virtual size_type get_size() const noexcept = 0; + + /** Set the current size (number of records). */ + virtual void set_size(size_type new_size) noexcept = 0; + + /** Get the total size of the data array in bytes. */ + virtual size_t data_byte_size() const noexcept = 0; + + /** Get the capacity (number of indices that can be stored). */ + virtual size_type indices_capacity() const noexcept = 0; +}; + /** The data for a single IVF list. */ template