Skip to content
Merged
Show file tree
Hide file tree
Changes from 109 commits
Commits
Show all changes
142 commits
Select commit Hold shift + click to select a range
5e11ce6
Integrate @anaruse's ACE method for large graphs
julianmi Sep 19, 2025
2d64ff3
ACE: Clarify partition naming
julianmi Sep 22, 2025
911c0a8
ACE: Enhance graph build parameters selection
julianmi Sep 23, 2025
4a779a4
ACE: Implement merging of small partitions
julianmi Sep 23, 2025
9f5d31c
ACE: Update parameters to clarify ace method usage
julianmi Sep 23, 2025
5552873
ACE: Add timinings
julianmi Sep 23, 2025
68a0ad8
ACE: Remove unused vector_fwd_list_1 in build_ace
julianmi Sep 24, 2025
0e86d18
ACE: Check if we have enough host memory
julianmi Sep 24, 2025
12b0366
ACE: Restructure parameter setting
julianmi Sep 24, 2025
b0f1d04
ACE: Restructure small partition merging
julianmi Sep 24, 2025
e25375f
ACE: Refactor partition data gathering
julianmi Sep 24, 2025
434bb4d
ACE: Refactor forward backward list creation
julianmi Sep 25, 2025
3b1010f
ACE: Refactor id adjusting of sub search graph
julianmi Sep 25, 2025
31431ba
ACE: Refactor id adjusting of final search graph
julianmi Sep 25, 2025
1c53df2
ACE: Refactor partition label handling and dataset storage
julianmi Sep 26, 2025
128031b
ACE: Improve file I/O speeds
julianmi Sep 26, 2025
8efde31
ACE: Reduce logging
julianmi Sep 29, 2025
eecb1a4
ACE: Fix issue in main loop logging
julianmi Sep 29, 2025
f9fc127
ACE: Store backward mapping for HNSW
julianmi Sep 29, 2025
307ee40
ACE: Introduce RAII wrapper for file descriptors to enhance file mana…
julianmi Sep 30, 2025
d77adf5
ACE: Enhance partition labeling with balanced K-means and small parti…
julianmi Oct 1, 2025
c036a78
ACE: Minor code improvements
julianmi Oct 2, 2025
1405c25
ACE: Add disk storage management to index structure
julianmi Oct 2, 2025
5056eca
ACE: Formatting
julianmi Oct 2, 2025
b2819f4
Move eigen_solvers from raft (#1402)
aamijar Oct 2, 2025
b23c08f
CosineExpanded Distance Metric for CAGRA (#197)
tarang-jain Oct 3, 2025
19eb437
Merge remote-tracking branch 'upstream/branch-25.12' into ace-disk
julianmi Oct 6, 2025
3101c5b
Fix merge conflict
julianmi Oct 6, 2025
f7ba0b1
ACE: Adjust reorder buffers to available memory
julianmi Oct 6, 2025
1831c97
ACE: Store dimensions in CAGRA index when disk is used
julianmi Oct 6, 2025
3f4b070
add build_knn_graph to API
mfoerste4 Oct 6, 2025
4fea8de
add serialize_to_hnsw
mfoerste4 Oct 6, 2025
f5ed9ac
connect components, minor fixes
mfoerste4 Oct 6, 2025
6941c94
remove bad check
mfoerste4 Oct 6, 2025
a792de0
ACE: Explain partitioned build.
julianmi Oct 7, 2025
5b19097
ACE: Document added CAGRA parameters
julianmi Oct 7, 2025
b19a4c1
ACE: Integrate build_ace into build
julianmi Oct 7, 2025
2c156ab
ACE: Reuse kmeans code
julianmi Oct 7, 2025
38a76ad
ACE: Add min_samples_per_partition constant
julianmi Oct 7, 2025
c815679
ACE: Address review feedback
julianmi Oct 7, 2025
3b06f85
ACE: Remove redundant checks of intermediate_degree in build_ace
julianmi Oct 7, 2025
0848d63
missing include
mfoerste4 Oct 7, 2025
e2046ca
ACE: Move helpers into src/utils
julianmi Oct 8, 2025
04e72bb
ACE: Reuse write_large_file in buffer flushing
julianmi Oct 8, 2025
2647728
fix wrong type conversion
mfoerste4 Oct 9, 2025
66fa4db
ACE: Use host view instead of mdspan
julianmi Oct 9, 2025
17a3d81
ACE: Improve memory heuristic
julianmi Oct 9, 2025
b33df08
Update Changelog [skip ci]
AyodeAwe Oct 8, 2025
a6c76bd
Deallocation should be noexcept (#1416)
bdice Oct 9, 2025
84fb605
ACE: Lower the buffer sizes due to OOM
julianmi Oct 10, 2025
a970edc
ANN_BENCH: Don't throw in noexcept do_deallocate (#1417)
achirkin Oct 9, 2025
6b3e1ae
Using `all_neighbors` for mutual reachability (#1234)
jinsolp Oct 10, 2025
85f2a23
ACE: Factor out graph degree checks
julianmi Oct 13, 2025
374a8f7
ACE: Remove duplicate zeroing of first offsets
julianmi Oct 13, 2025
47f9482
ACE: Use templated index type consistently
julianmi Oct 13, 2025
334dac9
ACE: Remove small parition merging
julianmi Oct 13, 2025
fba25b5
Revert "ACE: Use templated index type consistently"
julianmi Oct 14, 2025
b412ae5
ACE: Refactor partition label handling and ID mappings
julianmi Oct 14, 2025
a76180c
Merge branch 'branch-25.12' into ace-disk
julianmi Oct 14, 2025
b6e1539
ACE: Limit the k-means samples
julianmi Oct 14, 2025
bbe872a
ACE: Minor improvements based on feedback
julianmi Oct 14, 2025
487a3a1
ACE: Move build dir check
julianmi Oct 15, 2025
aa4f20a
ACE: Add ace_ef_construction parameter for index quality control
julianmi Oct 15, 2025
abe1b4d
ACE: Introduce ACE build method parameter
julianmi Oct 15, 2025
df350f0
fix assert comment
mfoerste4 Oct 15, 2025
18df7c1
fix merge conflict
mfoerste4 Oct 15, 2025
a4315cf
ACE: Fix overflow in byte offset calculation
julianmi Oct 16, 2025
cdb8077
Merge branch 'branch-25.12' into ace-disk
julianmi Oct 16, 2025
56ef910
support hierarchy::none in from_cagra disk-index
mfoerste4 Oct 16, 2025
6e20374
ACE: Add missing c interfaces
julianmi Oct 16, 2025
097f78c
ACE: Clean up augmented file
julianmi Oct 16, 2025
7e74a1c
properly release mmap
mfoerste4 Oct 16, 2025
b0b0c24
ACE: Adress review feedback
julianmi Oct 16, 2025
19589b8
buffer ofstream
mfoerste4 Oct 16, 2025
f49ce3d
ACE: Further improvements based on feedback
julianmi Oct 17, 2025
bead16d
ACE: Add example
julianmi Oct 17, 2025
9459fc3
ACE: Improve Java and Python interfaces
julianmi Oct 20, 2025
201c3b5
Merge remote-tracking branch 'julian/ace-disk' into ace_serialize
mfoerste4 Oct 20, 2025
c9b39b4
Merge branch 'branch-25.12' into ace-disk
julianmi Oct 20, 2025
bd4742e
Merge remote-tracking branch 'julian/ace-disk' into ace_serialize
mfoerste4 Oct 20, 2025
11fb018
ACE: Switch to host sampling for clustering
julianmi Oct 21, 2025
604da38
ACE: Fix build dir check
julianmi Oct 21, 2025
db68380
ACE: Clarify memory limit messaging
julianmi Oct 21, 2025
b801927
ACE: Ensure minimum report and log intervals in partitioning functions
julianmi Oct 21, 2025
40752f9
ACE: Add CAGRA ACE unit tests
julianmi Oct 21, 2025
7fa3fbb
ACE: Rename primary to core partition
julianmi Oct 22, 2025
17d780d
Merge remote-tracking branch 'mfoerste4/ace_serialize' into ace-disk
julianmi Oct 22, 2025
dcee956
Merge branch 'main' into ace-disk
julianmi Oct 22, 2025
daa6a45
Fix uninitialized cagra_ace_build_
julianmi Oct 22, 2025
46e5ab2
Fix build_knn_graph documentation
julianmi Oct 22, 2025
30d9f69
Merge branch 'main' into ace-disk
julianmi Oct 23, 2025
5d469e5
Merge remote-tracking branch 'origin/ace-disk' into ace-disk
julianmi Oct 23, 2025
83c5828
ACE: Add separate timer for relabeling
julianmi Oct 23, 2025
17d85eb
ACE: Improve serialization checks and optimize memory handling
julianmi Oct 26, 2025
4af2164
ACE: Add missing C interfaces
julianmi Oct 27, 2025
865f6e0
ACE: Use RAFT_EXPECTS instead of ASSERT
julianmi Oct 27, 2025
4f4037c
ACE: Add missing C tests
julianmi Oct 28, 2025
05c3409
ACE: Minor logging improvements
julianmi Oct 28, 2025
d409ead
fix odd graph degree
mfoerste4 Oct 28, 2025
63e6e83
ACE: Switch to NumPy file format
julianmi Oct 28, 2025
f3b9286
Merge remote-tracking branch 'upstream/main' into ace-disk
julianmi Oct 28, 2025
32081df
ACE: Use SPDX licensing
julianmi Oct 28, 2025
01713bf
merge conflicts
mfoerste4 Oct 28, 2025
0248a4f
Merge branch 'ace-disk' of https://github.com/julianmi/cuvs into ace-…
mfoerste4 Oct 28, 2025
fe32571
Merge branch 'main' into ace-disk
julianmi Oct 29, 2025
15f547d
ACE: Drop ace prefix from parameters
julianmi Oct 29, 2025
fdaa534
ACE: Rename ACE example and use HNSW search
julianmi Oct 29, 2025
7e15b0a
ACE: Minor improvements
julianmi Oct 30, 2025
8074bfe
Merge branch 'main' into ace-disk
julianmi Oct 30, 2025
1637c25
ACE: Address review comments
julianmi Nov 1, 2025
5c6a2a8
Merge branch 'main' into ace-disk
julianmi Nov 1, 2025
14418c1
ACE: Use RAFT_EXPECTS
julianmi Nov 3, 2025
d4d18cc
Merge remote-tracking branch 'upstream/main' into ace-disk
julianmi Nov 3, 2025
89eac01
ACE: Remove outdated Java and Python interfaces
julianmi Nov 3, 2025
bb03ce0
ACE: Better explain parameters
julianmi Nov 4, 2025
640fa09
ACE: Support ACE build method in benchmarks
julianmi Nov 4, 2025
5961659
ACE: Add Python interface and tests
julianmi Nov 4, 2025
2f8f65e
Merge branch 'main' into ace-disk
julianmi Nov 4, 2025
9e843d7
ACE: Fix python docstring
julianmi Nov 5, 2025
69bca47
ACE: Add Java interface and tests
julianmi Nov 6, 2025
cc6b4b1
ACE: Improve comments explaining the approach
julianmi Nov 6, 2025
0766756
Merge branch 'main' into ace-disk
julianmi Nov 6, 2025
836bb55
ACE: Fix Java docstring
julianmi Nov 6, 2025
355150f
Merge branch 'main' into ace-disk
julianmi Nov 7, 2025
d0392f4
ACE: Improve example
julianmi Nov 7, 2025
d607a7d
Merge branch 'main' into ace-disk
julianmi Nov 8, 2025
05908bb
ACE: Refactor CAGRA and HNSW index handling for disk storage
julianmi Nov 10, 2025
b845653
Merge branch 'main' into ace-disk
julianmi Nov 10, 2025
faaa492
ACE: Remove ace_set_index_params
julianmi Nov 10, 2025
48d7f2a
Merge branch 'main' into ace-disk
julianmi Nov 11, 2025
ec097d1
Merge branch 'main' into ace-disk
julianmi Nov 11, 2025
b3b5ddb
ACE: Move file_io and host_memory headers
julianmi Nov 12, 2025
5e7bd00
ACE: Add checks for indices on disk
julianmi Nov 12, 2025
756d8fb
ACE: Minor improvements
julianmi Nov 12, 2025
e4c440a
ACE: Use use_disk_mode instead of use_disk
julianmi Nov 12, 2025
235710c
ACE: Align graph degree in Java test
julianmi Nov 12, 2025
d4f7f42
Merge branch 'main' into ace-disk
julianmi Nov 12, 2025
4df5f6e
ACE: Improve file descriptor
julianmi Nov 12, 2025
105b208
ACE: Remove on_disk()
julianmi Nov 12, 2025
d2eb731
Merge branch 'main' into ace-disk
julianmi Nov 13, 2025
4461637
ACE: Move helpers into their own compilation unit
julianmi Nov 13, 2025
e290130
ACE: Use disk-mode in example
julianmi Nov 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 73 additions & 3 deletions c/include/cuvs/neighbors/cagra.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ enum cuvsCagraGraphBuildAlgo {
/* Experimental, use NN-Descent to build all-neighbors knn graph */
NN_DESCENT = 2,
/* Experimental, use iterative cagra search and optimize to build the knn graph */
ITERATIVE_CAGRA_SEARCH = 3
ITERATIVE_CAGRA_SEARCH = 3,
/* Use ACE (Augmented Core Extraction) to build the graph */
Comment thread
julianmi marked this conversation as resolved.
Outdated
ACE = 4
};

/** Parameters for VPQ compression. */
Expand Down Expand Up @@ -84,6 +86,36 @@ struct cuvsIvfPqParams {

typedef struct cuvsIvfPqParams* cuvsIvfPqParams_t;

/** Parameters for ACE (Augmented Core Extraction) graph build */
Comment thread
julianmi marked this conversation as resolved.
Outdated
struct cuvsAceParams {
/**
* Number of partitions for ACE (Augmented Core Extraction) partitioned build.
* Small values might improve recall but potentially degrade performance and
* increase memory usage. Partitions should not be too small to prevent issues
* in KNN graph construction. 100k - 5M vectors per partition is recommended
* depending on the available host and GPU memory.
*/
size_t npartitions;
Comment thread
julianmi marked this conversation as resolved.
/**
* The index quality for the ACE build.
* Bigger values increase the index quality. At some point, increasing this will no longer
* improve the quality.
*/
size_t ef_construction;
/**
* Directory to store ACE build artifacts (e.g., KNN graph, optimized graph).
* Used when `npartitions` > 1 or `use_disk` is true.
*/
const char* build_dir;
/**
* Whether to use disk-based storage for ACE build.
* When true, enables disk-based operations for memory-efficient graph construction.
*/
bool use_disk;
};

typedef struct cuvsAceParams* cuvsAceParams_t;

/**
* @brief Supplemental parameters to build CAGRA Index
*
Expand All @@ -106,9 +138,12 @@ struct cuvsCagraIndexParams {
*/
cuvsCagraCompressionParams_t compression;
/**
* Optional: specify ivf pq params when `build_algo = IVF_PQ`
* Optional: specify graph build params based on build_algo
* - IVF_PQ: cuvsIvfPqParams_t
* - ACE: cuvsAceParams_t
* - Others: nullptr
*/
cuvsIvfPqParams_t graph_build_params;
void* graph_build_params;
};

typedef struct cuvsCagraIndexParams* cuvsCagraIndexParams_t;
Expand Down Expand Up @@ -145,6 +180,22 @@ cuvsError_t cuvsCagraCompressionParamsCreate(cuvsCagraCompressionParams_t* param
*/
cuvsError_t cuvsCagraCompressionParamsDestroy(cuvsCagraCompressionParams_t params);

/**
* @brief Allocate ACE params, and populate with default values
*
* @param[in] params cuvsAceParams_t to allocate
* @return cuvsError_t
*/
cuvsError_t cuvsAceParamsCreate(cuvsAceParams_t* params);
Comment thread
julianmi marked this conversation as resolved.

/**
* @brief De-allocate ACE params
*
* @param[in] params
* @return cuvsError_t
*/
cuvsError_t cuvsAceParamsDestroy(cuvsAceParams_t params);
Comment thread
robertmaynard marked this conversation as resolved.

/**
* @}
*/
Expand Down Expand Up @@ -365,6 +416,25 @@ cuvsError_t cuvsCagraIndexGetSize(cuvsCagraIndex_t index, int64_t* size);
*/
cuvsError_t cuvsCagraIndexGetGraphDegree(cuvsCagraIndex_t index, int64_t* graph_degree);

/**
* @brief Check if the CAGRA index is stored on disk
*
* @param[in] index CAGRA index
* @param[out] on_disk return true if index is on disk, false otherwise
* @return cuvsError_t
*/
cuvsError_t cuvsCagraIndexIsOnDisk(cuvsCagraIndex_t index, bool* on_disk);

/**
* @brief Get the file directory where the CAGRA index is stored (if on disk)
Comment thread
julianmi marked this conversation as resolved.
Outdated
*
* @param[in] index CAGRA index
* @param[out] file_directory return file directory path (caller must free)
* @param[out] length length of the file_directory string
* @return cuvsError_t
*/
cuvsError_t cuvsCagraIndexGetFileDirectory(cuvsCagraIndex_t index, char** file_directory, size_t* length);

/**
* @brief Returns a view of the CAGRA dataset
*
Expand Down
84 changes: 83 additions & 1 deletion c/src/neighbors/cagra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
*/

#include <cstdint>
#include <cstring>
#include <dlpack/dlpack.h>

#include <raft/core/error.hpp>
Expand All @@ -15,6 +16,7 @@
#include <cuvs/neighbors/cagra.h>
#include <cuvs/neighbors/common.h>
#include <cuvs/neighbors/cagra.hpp>
#include <cuvs/neighbors/graph_build_types.hpp>

#include "../core/exceptions.hpp"
#include "../core/interop.hpp"
Expand All @@ -28,6 +30,7 @@ static void _set_graph_build_params(
std::variant<std::monostate,
cuvs::neighbors::cagra::graph_build_params::ivf_pq_params,
cuvs::neighbors::cagra::graph_build_params::nn_descent_params,
cuvs::neighbors::cagra::graph_build_params::ace_params,
cuvs::neighbors::cagra::graph_build_params::iterative_search_params>& out_params,
cuvsCagraIndexParams& params,
cuvsCagraGraphBuildAlgo algo,
Expand Down Expand Up @@ -79,6 +82,18 @@ static void _set_graph_build_params(
out_params = nn_params;
break;
}
case cuvsCagraGraphBuildAlgo::ACE: {
cuvs::neighbors::cagra::graph_build_params::ace_params ace_p;
if (params.graph_build_params) {
auto ace_params_c = static_cast<cuvsAceParams*>(params.graph_build_params);
ace_p.npartitions = ace_params_c->npartitions;
ace_p.ef_construction = ace_params_c->ef_construction;
ace_p.build_dir = std::string(ace_params_c->build_dir);
ace_p.use_disk = ace_params_c->use_disk;
}
out_params = ace_p;
break;
}
case cuvsCagraGraphBuildAlgo::ITERATIVE_CAGRA_SEARCH: {
cuvs::neighbors::cagra::graph_build_params::iterative_search_params p;
out_params = p;
Expand Down Expand Up @@ -432,6 +447,28 @@ extern "C" cuvsError_t cuvsCagraIndexGetGraphDegree(cuvsCagraIndex_t index, int6
});
}

extern "C" cuvsError_t cuvsCagraIndexIsOnDisk(cuvsCagraIndex_t index, bool* on_disk)
{
return cuvs::core::translate_exceptions([=] {
auto index_ptr = reinterpret_cast<cuvs::neighbors::cagra::index<float, uint32_t>*>(index->addr);
*on_disk = index_ptr->on_disk();
});
}

extern "C" cuvsError_t cuvsCagraIndexGetFileDirectory(cuvsCagraIndex_t index,
char** file_directory,
size_t* length)
{
return cuvs::core::translate_exceptions([=] {
auto index_ptr = reinterpret_cast<cuvs::neighbors::cagra::index<float, uint32_t>*>(index->addr);
const auto& dir = index_ptr->file_directory();
*length = dir.length();
*file_directory = static_cast<char*>(malloc((*length + 1) * sizeof(char)));
std::strncpy(*file_directory, dir.c_str(), *length);
(*file_directory)[*length] = '\0';
});
}

extern "C" cuvsError_t cuvsCagraIndexGetDataset(cuvsCagraIndex_t index, DLManagedTensor* dataset)
{
return cuvs::core::translate_exceptions([=] {
Expand Down Expand Up @@ -641,7 +678,26 @@ extern "C" cuvsError_t cuvsCagraIndexParamsCreate(cuvsCagraIndexParams_t* params
extern "C" cuvsError_t cuvsCagraIndexParamsDestroy(cuvsCagraIndexParams_t params)
{
return cuvs::core::translate_exceptions([=] {
delete params->graph_build_params;
// Delete graph_build_params based on the build algorithm type
if (params->graph_build_params != nullptr) {
switch (params->build_algo) {
case cuvsCagraGraphBuildAlgo::IVF_PQ:
delete static_cast<cuvsIvfPqParams *>(params->graph_build_params);
break;
case cuvsCagraGraphBuildAlgo::ACE: {
auto ace_params = static_cast<cuvsAceParams *>(params->graph_build_params);
// Free the allocated build directory string
if (ace_params->build_dir) { free(const_cast<char*>(ace_params->build_dir)); }
delete ace_params;
break;
}
case cuvsCagraGraphBuildAlgo::AUTO_SELECT:
case cuvsCagraGraphBuildAlgo::NN_DESCENT:
case cuvsCagraGraphBuildAlgo::ITERATIVE_CAGRA_SEARCH:
// These algorithms don't have separate parameter structs
break;
}
}
delete params;
});
}
Expand All @@ -665,6 +721,32 @@ extern "C" cuvsError_t cuvsCagraCompressionParamsDestroy(cuvsCagraCompressionPar
return cuvs::core::translate_exceptions([=] { delete params; });
}

extern "C" cuvsError_t cuvsAceParamsCreate(cuvsAceParams_t* params)
{
return cuvs::core::translate_exceptions([=] {
auto ps = cuvs::neighbors::cagra::graph_build_params::ace_params();

// Allocate and copy the build directory string
const char* build_dir = strdup(ps.build_dir.c_str());

*params = new cuvsAceParams{.npartitions = ps.npartitions,
.ef_construction = ps.ef_construction,
.build_dir = build_dir,
.use_disk = ps.use_disk};
});
}

extern "C" cuvsError_t cuvsAceParamsDestroy(cuvsAceParams_t params)
{
return cuvs::core::translate_exceptions([=] {
if (params) {
// Free the allocated build directory string
if (params->build_dir) { free(const_cast<char*>(params->build_dir)); }
delete params;
}
});
}

extern "C" cuvsError_t cuvsCagraExtendParamsCreate(cuvsCagraExtendParams_t* params)
{
return cuvs::core::translate_exceptions(
Expand Down
Loading
Loading