Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 118 additions & 5 deletions faiss/IndexHNSW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
#include <cstdlib>
#include <cstring>

#include <limits>
#include <memory>
#include <queue>
#include <random>
#include <unordered_set>

#include <sys/stat.h>
Expand Down Expand Up @@ -144,7 +147,9 @@ void hnsw_add_vertices(

int i1 = n;

for (int pt_level = hist.size() - 1; pt_level >= 0; pt_level--) {
for (int pt_level = hist.size() - 1;
pt_level >= !index_hnsw.init_level0;
pt_level--) {
int i0 = i1 - hist[pt_level];

if (verbose) {
Expand Down Expand Up @@ -180,7 +185,13 @@ void hnsw_add_vertices(
continue;
}

hnsw.add_with_locks(*dis, pt_level, pt_id, locks, vt);
hnsw.add_with_locks(
*dis,
pt_level,
pt_id,
locks,
vt,
index_hnsw.keep_max_size_level0 && (pt_level == 0));

if (prev_display >= 0 && i - i0 > prev_display + 10000) {
prev_display = i - i0;
Expand All @@ -200,7 +211,11 @@ void hnsw_add_vertices(
}
i1 = i0;
}
FAISS_ASSERT(i1 == 0);
if (index_hnsw.init_level0) {
FAISS_ASSERT(i1 == 0);
} else {
FAISS_ASSERT((i1 - hist[0]) == 0);
}
}
if (verbose) {
printf("Done in %.3f ms\n", getmillisecs() - t0);
Expand Down Expand Up @@ -404,10 +419,18 @@ void IndexHNSW::search_level_0(
float* distances,
idx_t* labels,
int nprobe,
int search_type) const {
int search_type,
const SearchParameters* params_in) const {
FAISS_THROW_IF_NOT(k > 0);
FAISS_THROW_IF_NOT(nprobe > 0);

const SearchParametersHNSW* params = nullptr;

if (params_in) {
params = dynamic_cast<const SearchParametersHNSW*>(params_in);
FAISS_THROW_IF_NOT_MSG(params, "params type invalid");
}

storage_idx_t ntotal = hnsw.levels.size();

using RH = HeapBlockResultHandler<HNSW::C>;
Expand All @@ -434,13 +457,21 @@ void IndexHNSW::search_level_0(
nearest_d + i * nprobe,
search_type,
search_stats,
vt);
vt,
params);
res.end();
vt.advance();
}
#pragma omp critical
{ hnsw_stats.combine(search_stats); }
}
if (is_similarity_metric(this->metric_type)) {
// we need to revert the negated distances
#pragma omp parallel for
for (size_t i = 0; i < k * n; i++) {
distances[i] = -distances[i];
}
}
}

void IndexHNSW::init_level_0_from_knngraph(
Expand Down Expand Up @@ -863,4 +894,86 @@ void IndexHNSW2Level::flip_to_ivf() {
delete storage2l;
}

/**************************************************************
* IndexHNSWCagra implementation
**************************************************************/

IndexHNSWCagra::IndexHNSWCagra() {
is_trained = true;
}

IndexHNSWCagra::IndexHNSWCagra(int d, int M, MetricType metric)
: IndexHNSW(
(metric == METRIC_L2)
? static_cast<IndexFlat*>(new IndexFlatL2(d))
: static_cast<IndexFlat*>(new IndexFlatIP(d)),
M) {
FAISS_THROW_IF_NOT_MSG(
((metric == METRIC_L2) || (metric == METRIC_INNER_PRODUCT)),
"unsupported metric type for IndexHNSWCagra");
own_fields = true;
is_trained = true;
init_level0 = true;
keep_max_size_level0 = true;
}

void IndexHNSWCagra::add(idx_t n, const float* x) {
FAISS_THROW_IF_NOT_MSG(
!base_level_only,
"Cannot add vectors when base_level_only is set to True");

IndexHNSW::add(n, x);
}

void IndexHNSWCagra::search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels,
const SearchParameters* params) const {
if (!base_level_only) {
IndexHNSW::search(n, x, k, distances, labels, params);
} else {
std::vector<storage_idx_t> nearest(n);
std::vector<float> nearest_d(n);

#pragma omp for
for (idx_t i = 0; i < n; i++) {
std::unique_ptr<DistanceComputer> dis(
storage_distance_computer(this->storage));
dis->set_query(x + i * d);
nearest[i] = -1;
nearest_d[i] = std::numeric_limits<float>::max();

std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<idx_t> distrib(0, this->ntotal);

for (idx_t j = 0; j < num_base_level_search_entrypoints; j++) {
auto idx = distrib(gen);
auto distance = (*dis)(idx);
if (distance < nearest_d[i]) {
nearest[i] = idx;
nearest_d[i] = distance;
}
}
FAISS_THROW_IF_NOT_MSG(
nearest[i] >= 0, "Could not find a valid entrypoint.");
}

search_level_0(
n,
x,
k,
nearest.data(),
nearest_d.data(),
distances,
labels,
1, // n_probes
1, // search_type
params);
}
}

} // namespace faiss
44 changes: 43 additions & 1 deletion faiss/IndexHNSW.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,18 @@ struct IndexHNSW : Index {
bool own_fields = false;
Index* storage = nullptr;

// When set to false, level 0 in the knn graph is not initialized.
// This option is used by GpuIndexCagra::copyTo(IndexHNSWCagra*)
// as level 0 knn graph is copied over from the index built by
// GpuIndexCagra.
bool init_level0 = true;

// When set to true, all neighbors in level 0 are filled up
// to the maximum size allowed (2 * M). This option is used by
// IndexHHNSWCagra to create a full base layer graph that is
// used when GpuIndexCagra::copyFrom(IndexHNSWCagra*) is invoked.
bool keep_max_size_level0 = false;

explicit IndexHNSW(int d = 0, int M = 32, MetricType metric = METRIC_L2);
explicit IndexHNSW(Index* storage, int M = 32);

Expand Down Expand Up @@ -81,7 +93,8 @@ struct IndexHNSW : Index {
float* distances,
idx_t* labels,
int nprobe = 1,
int search_type = 1) const;
int search_type = 1,
const SearchParameters* params = nullptr) const;

/// alternative graph building
void init_level_0_from_knngraph(int k, const float* D, const idx_t* I);
Expand Down Expand Up @@ -148,4 +161,33 @@ struct IndexHNSW2Level : IndexHNSW {
const SearchParameters* params = nullptr) const override;
};

struct IndexHNSWCagra : IndexHNSW {
IndexHNSWCagra();
IndexHNSWCagra(int d, int M, MetricType metric = METRIC_L2);

/// When set to true, the index is immutable.
/// This option is used to copy the knn graph from GpuIndexCagra
/// to the base level of IndexHNSWCagra without adding upper levels.
/// Doing so enables to search the HNSW index, but removes the
/// ability to add vectors.
bool base_level_only = false;

/// When `base_level_only` is set to `True`, the search function
/// searches only the base level knn graph of the HNSW index.
/// This parameter selects the entry point by randomly selecting
/// some points and using the best one.
int num_base_level_search_entrypoints = 32;

void add(idx_t n, const float* x) override;

/// entry point for search
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels,
const SearchParameters* params = nullptr) const override;
};

} // namespace faiss
8 changes: 6 additions & 2 deletions faiss/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -238,11 +238,15 @@ generate_ivf_interleaved_code()

if(FAISS_ENABLE_RAFT)
list(APPEND FAISS_GPU_HEADERS
GpuIndexCagra.h
impl/RaftCagra.cuh
impl/RaftFlatIndex.cuh
impl/RaftIVFFlat.cuh
impl/RaftIVFPQ.cuh
utils/RaftUtils.h)
list(APPEND FAISS_GPU_SRC
GpuIndexCagra.cu
impl/RaftCagra.cu
impl/RaftFlatIndex.cu
impl/RaftIVFFlat.cu
impl/RaftIVFPQ.cu
Expand Down Expand Up @@ -316,5 +320,5 @@ __nv_relfatbin : { *(__nv_relfatbin) }
target_link_options(faiss_gpu PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")

find_package(CUDAToolkit REQUIRED)
target_link_libraries(faiss_gpu PRIVATE CUDA::cudart CUDA::cublas $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::compiled> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
target_compile_options(faiss_gpu PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr>)
target_link_libraries(faiss_gpu PRIVATE CUDA::cudart CUDA::cublas $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::compiled> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass> $<$<BOOL:${FAISS_ENABLE_RAFT}>:OpenMP::OpenMP_CXX>)
target_compile_options(faiss_gpu PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr $<$<BOOL:${FAISS_ENABLE_RAFT}>:-Xcompiler=${OpenMP_CXX_FLAGS}>>)
29 changes: 27 additions & 2 deletions faiss/gpu/GpuCloner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@

#include <faiss/IndexBinaryFlat.h>
#include <faiss/IndexFlat.h>
#if defined USE_NVIDIA_RAFT
#include <faiss/IndexHNSW.h>
#endif
#include <faiss/IndexIVF.h>
#include <faiss/IndexIVFFlat.h>
#include <faiss/IndexIVFPQ.h>
Expand All @@ -24,6 +27,9 @@
#include <faiss/MetaIndexes.h>
#include <faiss/gpu/GpuIndex.h>
#include <faiss/gpu/GpuIndexBinaryFlat.h>
#if defined USE_NVIDIA_RAFT
#include <faiss/gpu/GpuIndexCagra.h>
#endif
#include <faiss/gpu/GpuIndexFlat.h>
#include <faiss/gpu/GpuIndexIVFFlat.h>
#include <faiss/gpu/GpuIndexIVFPQ.h>
Expand Down Expand Up @@ -85,7 +91,15 @@ Index* ToCPUCloner::clone_Index(const Index* index) {
// objective is to make a single component out of them
// (inverse op of ToGpuClonerMultiple)

} else if (auto ish = dynamic_cast<const IndexShards*>(index)) {
}
#if defined USE_NVIDIA_RAFT
else if (auto icg = dynamic_cast<const GpuIndexCagra*>(index)) {
IndexHNSWCagra* res = new IndexHNSWCagra();
icg->copyTo(res);
return res;
}
#endif
else if (auto ish = dynamic_cast<const IndexShards*>(index)) {
int nshard = ish->count();
FAISS_ASSERT(nshard > 0);
Index* res = clone_Index(ish->at(0));
Expand Down Expand Up @@ -215,7 +229,18 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
}

return res;
} else {
}
#if defined USE_NVIDIA_RAFT
else if (auto icg = dynamic_cast<const faiss::IndexHNSWCagra*>(index)) {
GpuIndexCagraConfig config;
config.device = device;
GpuIndexCagra* res =
new GpuIndexCagra(provider, icg->d, icg->metric_type, config);
res->copyFrom(icg);
return res;
}
#endif
else {
// use CPU cloner for IDMap and PreTransform
auto index_idmap = dynamic_cast<const IndexIDMap*>(index);
auto index_pt = dynamic_cast<const IndexPreTransform*>(index);
Expand Down
Loading