-
Notifications
You must be signed in to change notification settings - Fork 184
Add support for PQ preprocessing API #1278
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 68 commits
cfe4f92
096daa5
244a9cd
9537eb1
2883a25
78dfd69
9dd0cfe
9c543fb
5471d9a
716fa58
6d6d4ca
746cac4
1950da4
75629e5
32c8912
d774999
a55df82
5a151f9
a0c5071
7112efe
407b500
51d9c94
37d9f7c
eea1421
a73809c
7bd8f17
3e8ef62
f5bf4ea
cebc548
f6b7829
f8fd16e
f04545a
487376c
03f8761
9ebca8a
a2c833b
bf68702
294db1c
746961d
cf6d482
6cdfa9d
67a5997
8d12d2d
ab0fa28
d4a46fa
377b908
b13f9f0
5f5791a
22e83e6
fe6753e
d05c85c
cc90c75
d8e6c84
c850df7
dcd8380
86465ff
9bfe19e
4730df6
0f17a0c
794e5b9
bf238fd
cb4780d
0f05512
b374931
c122228
06ddb89
b9675e8
d97f9b7
6b9126e
faa7659
05bf256
0d76786
e0805da
261a4b4
f0d8061
c56a8c4
2ddae5c
8052db3
c927dec
60ebe3f
f198537
4bb9435
2e13085
081254c
a69e2cf
6e77ab0
841a3a9
b95eb46
809bacb
3116fe3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,5 +1,5 @@ | ||||||||||
| /* | ||||||||||
| * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. | ||||||||||
| * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. | ||||||||||
| * SPDX-License-Identifier: Apache-2.0 | ||||||||||
| */ | ||||||||||
|
|
||||||||||
|
|
@@ -121,6 +121,11 @@ cuvsError_t cuvsKMeansParamsCreate(cuvsKMeansParams_t* params); | |||||||||
| */ | ||||||||||
| cuvsError_t cuvsKMeansParamsDestroy(cuvsKMeansParams_t params); | ||||||||||
|
|
||||||||||
| /** | ||||||||||
| * @brief Type of k-means algorithm. | ||||||||||
| */ | ||||||||||
| typedef enum { KMeans = 0, KMeansBalanced = 1 } cuvsKMeansType; | ||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This introduces two new symbols to the global namespace: Perhaps we should rename to something like (I realize that we haven't been doing this for most of our enums so far, but I think this is something that we should change)
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also - we are using a boolean parameter for this distinction in the cuvs/c/include/cuvs/cluster/kmeans.h Lines 95 to 98 in 13fb586
and i feel like we either should be using an enum for both or using a bool for both to be consistent
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree we should change these now before they get baked into the ABI and we have to wait ~6 months
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, I created #1717 to track that problem
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the bool vs. enum I much prefer the enum since it would support much better adding a third algorithm for KMeans. But I don't want to modify kmeans codebase in this PR since its scope is already big enough |
||||||||||
|
|
||||||||||
| /** | ||||||||||
| * @} | ||||||||||
| */ | ||||||||||
|
|
||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,220 @@ | ||
| /* | ||
| * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <cuvs/cluster/kmeans.h> | ||
| #include <cuvs/core/c_api.h> | ||
| #include <dlpack/dlpack.h> | ||
| #include <stdint.h> | ||
|
|
||
| #ifdef __cplusplus | ||
| extern "C" { | ||
| #endif | ||
|
|
||
| /** | ||
| * @defgroup preprocessing_c_pq C API for Product Quantizer | ||
| * @{ | ||
| */ | ||
| /** | ||
| * @brief Product quantizer parameters. | ||
| */ | ||
| struct cuvsProductQuantizerParams { | ||
| /** | ||
| * The bit length of the vector element after compression by PQ. | ||
| * | ||
| * Possible values: within [4, 16]. | ||
| * | ||
| * Hint: the smaller the 'pq_bits', the smaller the index size and the better the search | ||
| * performance, but the lower the recall. | ||
| */ | ||
| uint32_t pq_bits; | ||
| /** | ||
| * The dimensionality of the vector after compression by PQ. | ||
| * When zero, an optimal value is selected using a heuristic. | ||
| * | ||
| * TODO: at the moment `dim` must be a multiple `pq_dim`. | ||
| */ | ||
| uint32_t pq_dim; | ||
| /** | ||
| * Vector Quantization (VQ) codebook size - number of "coarse cluster centers". | ||
| * When zero, an optimal value is selected using a heuristic. | ||
| * When one, only product quantization is used. | ||
| */ | ||
| uint32_t vq_n_centers; | ||
| /** The number of iterations searching for kmeans centers (both VQ & PQ phases). */ | ||
| uint32_t kmeans_n_iters; | ||
| /** | ||
| * The fraction of data to use during iterative kmeans building (VQ phase). | ||
| * When zero, an optimal value is selected using a heuristic. | ||
| */ | ||
| double vq_kmeans_trainset_fraction; | ||
| /** | ||
| * The fraction of data to use during iterative kmeans building (PQ phase). | ||
| * When zero, an optimal value is selected using a heuristic. | ||
| */ | ||
| double pq_kmeans_trainset_fraction; | ||
|
lowener marked this conversation as resolved.
Outdated
|
||
| /** | ||
| * The type of kmeans algorithm to use for PQ training. | ||
| */ | ||
| cuvsKMeansType pq_kmeans_type; | ||
| /** | ||
| * The max number of data points to use per PQ code during PQ codebook training. Using more data | ||
| * points per PQ code may increase the quality of PQ codebook but may also increase the build | ||
| * time. We will use `pq_n_centers * max_train_points_per_pq_code` training | ||
| * points to train each PQ codebook. | ||
| */ | ||
| uint32_t max_train_points_per_pq_code; | ||
| /** | ||
| * Whether to use Vector Quantization (KMeans) before product quantization (PQ). | ||
| * When true, VQ is used before PQ. When false, only product quantization is used. | ||
| */ | ||
| bool use_vq; | ||
| /** | ||
| * Whether to use subspaces for product quantization (PQ). | ||
| * When true, one PQ codebook is used for each subspace. Otherwise, a single | ||
| * PQ codebook is used. | ||
| */ | ||
| bool use_subspaces; | ||
| }; | ||
|
|
||
| typedef struct cuvsProductQuantizerParams* cuvsProductQuantizerParams_t; | ||
|
|
||
| /** | ||
| * @brief Allocate Product Quantizer params, and populate with default values | ||
| * | ||
| * @param[in] params cuvsProductQuantizerParams_t to allocate | ||
| * @return cuvsError_t | ||
| */ | ||
| cuvsError_t cuvsProductQuantizerParamsCreate(cuvsProductQuantizerParams_t* params); | ||
|
|
||
| /** | ||
| * @brief De-allocate Product Quantizer params | ||
| * | ||
| * @param[in] params | ||
| * @return cuvsError_t | ||
| */ | ||
| cuvsError_t cuvsProductQuantizerParamsDestroy(cuvsProductQuantizerParams_t params); | ||
|
|
||
| /** | ||
| * @brief Defines and stores product quantizer upon training | ||
| * | ||
| * The quantization is performed by a linear mapping of an interval in the | ||
| * float data type to the full range of the quantized int type. | ||
| */ | ||
| typedef struct { | ||
| uintptr_t addr; | ||
| DLDataType dtype; | ||
| } cuvsProductQuantizer; | ||
|
|
||
| typedef cuvsProductQuantizer* cuvsProductQuantizer_t; | ||
|
|
||
| /** | ||
| * @brief Allocate Product Quantizer | ||
| * | ||
| * @param[in] quantizer cuvsProductQuantizer_t to allocate | ||
| * @return cuvsError_t | ||
| */ | ||
| cuvsError_t cuvsProductQuantizerCreate(cuvsProductQuantizer_t* quantizer); | ||
|
|
||
| /** | ||
| * @brief De-allocate Product Quantizer | ||
| * | ||
| * @param[in] quantizer | ||
| * @return cuvsError_t | ||
| */ | ||
| cuvsError_t cuvsProductQuantizerDestroy(cuvsProductQuantizer_t quantizer); | ||
|
|
||
| /** | ||
| * @brief Trains a product quantizer to be used later for quantizing the dataset. | ||
| * | ||
| * @param[in] res raft resource | ||
| * @param[in] params Parameters for product quantizer training | ||
| * @param[in] dataset a row-major host or device matrix | ||
| * @param[out] quantizer trained product quantizer | ||
| */ | ||
| cuvsError_t cuvsProductQuantizerTrain(cuvsResources_t res, | ||
| cuvsProductQuantizerParams_t params, | ||
| DLManagedTensor* dataset, | ||
| cuvsProductQuantizer_t quantizer); | ||
|
|
||
| /** | ||
| * @brief Applies product quantization transform to the given dataset | ||
| * | ||
| * This applies product quantization to a dataset. | ||
| * | ||
| * @param[in] res raft resource | ||
| * @param[in] quantizer product quantizer | ||
| * @param[in] dataset a row-major host or device matrix to transform | ||
| * @param[out] out a row-major device matrix to store transformed data | ||
| */ | ||
| cuvsError_t cuvsProductQuantizerTransform(cuvsResources_t res, | ||
| cuvsProductQuantizer_t quantizer, | ||
| DLManagedTensor* dataset, | ||
| DLManagedTensor* out); | ||
|
|
||
| /** | ||
| * @brief Applies product quantization inverse transform to the given quantized codes | ||
| * | ||
| * This applies product quantization inverse transform to the given quantized codes. | ||
| * | ||
| * @param[in] res raft resource | ||
| * @param[in] quantizer product quantizer | ||
| * @param[in] codes a row-major device matrix of quantized codes | ||
| * @param[out] out a row-major device matrix to store the original data | ||
| */ | ||
| cuvsError_t cuvsProductQuantizerInverseTransform(cuvsResources_t res, | ||
| cuvsProductQuantizer_t quantizer, | ||
| DLManagedTensor* codes, | ||
| DLManagedTensor* out); | ||
|
|
||
| /** | ||
| * @brief Get the bit length of the vector element after compression by PQ. | ||
| * | ||
| * @param[in] quantizer product quantizer | ||
| * @param[out] pq_bits bit length of the vector element after compression by PQ | ||
| */ | ||
| cuvsError_t cuvsProductQuantizerGetPqBits(cuvsProductQuantizer_t quantizer, uint32_t* pq_bits); | ||
|
|
||
| /** | ||
| * @brief Get the dimensionality of the vector after compression by PQ. | ||
| * | ||
| * @param[in] quantizer product quantizer | ||
| * @param[out] pq_dim dimensionality of the vector after compression by PQ | ||
| */ | ||
| cuvsError_t cuvsProductQuantizerGetPqDim(cuvsProductQuantizer_t quantizer, uint32_t* pq_dim); | ||
|
lowener marked this conversation as resolved.
|
||
|
|
||
| /** | ||
| * @brief Get the PQ codebook. | ||
| * | ||
| * @param[in] quantizer product quantizer | ||
| * @param[out] pq_codebook PQ codebook | ||
| */ | ||
| cuvsError_t cuvsProductQuantizerGetPqCodebook(cuvsProductQuantizer_t quantizer, | ||
| DLManagedTensor* pq_codebook); | ||
|
|
||
| /** | ||
| * @brief Get the VQ codebook. | ||
| * | ||
| * @param[in] quantizer product quantizer | ||
| * @param[out] vq_codebook VQ codebook | ||
| */ | ||
| cuvsError_t cuvsProductQuantizerGetVqCodebook(cuvsProductQuantizer_t quantizer, | ||
| DLManagedTensor* vq_codebook); | ||
| /** | ||
| * @brief Get the encoded dimension of the quantized dataset. | ||
| * | ||
| * @param[in] quantizer product quantizer | ||
| * @param[out] encoded_dim encoded dimension of the quantized dataset | ||
| */ | ||
| cuvsError_t cuvsProductQuantizerGetEncodedDim(cuvsProductQuantizer_t quantizer, | ||
| uint32_t* encoded_dim); | ||
|
|
||
| /** | ||
| * @} | ||
| */ | ||
| #ifdef __cplusplus | ||
| } | ||
| #endif | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Future task- we should update
cuvsKMeansParams_tto use this and remove thebool hierarchicalflag. Can you create an issue for this just so we don't forget it? It doesn't have to be done now, especially since we're striving to maintain ABI compatibility in our C APIs (we need to start deprecating breaking changes).