Skip to content

Commit 6b65b70

Browse files
Merge branch 'branch-25.06' into merge-api-java-support
2 parents 3d81107 + 2972a82 commit 6b65b70

4 files changed

Lines changed: 251 additions & 89 deletions

File tree

cpp/src/neighbors/detail/cagra/cagra_build.cuh

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@
4444
#include <cstdio>
4545
#include <vector>
4646

47+
#include <sys/mman.h>
48+
4749
namespace cuvs::neighbors::cagra::detail {
4850

4951
template <typename IdxT>
@@ -410,6 +412,52 @@ void optimize(
410412
res, knn_graph_internal, new_graph_internal, guarantee_connectivity);
411413
}
412414

415+
// RAII wrapper for allocating memory with Transparent HugePage
416+
struct mmap_owner {
417+
// Allocate a new memory (not backed by a file)
418+
mmap_owner(size_t size) : size_{size}
419+
{
420+
int flags = MAP_ANONYMOUS | MAP_PRIVATE;
421+
ptr_ = mmap(nullptr, size, PROT_READ | PROT_WRITE, flags, -1, 0);
422+
if (ptr_ == MAP_FAILED) {
423+
ptr_ = nullptr;
424+
throw std::runtime_error("cuvs::mmap_owner error");
425+
}
426+
if (madvise(ptr_, size, MADV_HUGEPAGE) != 0) {
427+
munmap(ptr_, size);
428+
ptr_ = nullptr;
429+
throw std::runtime_error("cuvs::mmap_owner error");
430+
}
431+
}
432+
433+
~mmap_owner() noexcept
434+
{
435+
if (ptr_ != nullptr) { munmap(ptr_, size_); }
436+
}
437+
438+
// No copies for owning struct
439+
mmap_owner(const mmap_owner& res) = delete;
440+
auto operator=(const mmap_owner& other) -> mmap_owner& = delete;
441+
// Moving is fine
442+
mmap_owner(mmap_owner&& other)
443+
: ptr_{std::exchange(other.ptr_, nullptr)}, size_{std::exchange(other.size_, 0)}
444+
{
445+
}
446+
auto operator=(mmap_owner&& other) -> mmap_owner&
447+
{
448+
std::swap(this->ptr_, other.ptr_);
449+
std::swap(this->size_, other.size_);
450+
return *this;
451+
}
452+
453+
[[nodiscard]] auto data() const -> void* { return ptr_; }
454+
[[nodiscard]] auto size() const -> size_t { return size_; }
455+
456+
private:
457+
void* ptr_;
458+
size_t size_;
459+
};
460+
413461
template <typename T,
414462
typename IdxT = uint32_t,
415463
typename Accessor = raft::host_device_accessor<std::experimental::default_accessor<T>,
@@ -493,6 +541,14 @@ auto iterative_build_graph(
493541
}
494542
}
495543

544+
// Allocate memory for neighbors list using Transparent HugePage
545+
constexpr size_t thp_size = 2 * 1024 * 1024;
546+
size_t byte_size = sizeof(IdxT) * final_graph_size * topk;
547+
if (byte_size % thp_size) { byte_size += thp_size - (byte_size % thp_size); }
548+
mmap_owner neighbors_list(byte_size);
549+
IdxT* neighbors_ptr = (IdxT*)neighbors_list.data();
550+
memset(neighbors_ptr, 0, byte_size);
551+
496552
auto curr_graph_size = initial_graph_size;
497553
while (true) {
498554
RAFT_LOG_DEBUG("# graph_size = %lu (%.3lf)",
@@ -524,7 +580,9 @@ auto iterative_build_graph(
524580

525581
auto dev_query_view = raft::make_device_matrix_view<const T, int64_t>(
526582
dev_dataset.data_handle(), (int64_t)curr_query_size, dev_dataset.extent(1));
527-
auto neighbors = raft::make_host_matrix<IdxT, int64_t>(curr_query_size, curr_topk);
583+
584+
auto neighbors_view =
585+
raft::make_host_matrix_view<IdxT, int64_t>(neighbors_ptr, curr_query_size, curr_topk);
528586

529587
// Search.
530588
// Since there are many queries, divide them into batches and search them.
@@ -551,7 +609,7 @@ auto iterative_build_graph(
551609
batch_dev_distances_view);
552610

553611
auto batch_neighbors_view = raft::make_host_matrix_view<IdxT, int64_t>(
554-
neighbors.data_handle() + batch.offset() * curr_topk, batch.size(), curr_topk);
612+
neighbors_view.data_handle() + batch.offset() * curr_topk, batch.size(), curr_topk);
555613
raft::copy(batch_neighbors_view.data_handle(),
556614
batch_dev_neighbors_view.data_handle(),
557615
batch_neighbors_view.size(),
@@ -564,7 +622,7 @@ auto iterative_build_graph(
564622
cagra_graph = raft::make_host_matrix<IdxT, int64_t>(0, 0); // delete existing grahp
565623
cagra_graph = raft::make_host_matrix<IdxT, int64_t>(curr_graph_size, curr_graph_degree);
566624
optimize<IdxT>(
567-
res, neighbors.view(), cagra_graph.view(), flag_last ? params.guarantee_connectivity : 0);
625+
res, neighbors_view, cagra_graph.view(), flag_last ? params.guarantee_connectivity : 0);
568626
if (flag_last) { break; }
569627
}
570628

cpp/src/neighbors/detail/cagra/compute_distance_vpq.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,11 @@ struct vpq_descriptor_spec : public instance_spec<DataT, IndexT, DistanceT> {
8080
if (dataset.pq_bits() != PqBits) { return -1.0; }
8181
if (dataset.pq_len() != PqLen) { return -1.0; }
8282
// Otherwise, favor the closest dataset dimensionality.
83-
return 1.0 / (0.1 + std::abs(double(dataset.dim()) - double(DatasetBlockDim)));
83+
constexpr std::uint32_t preferred_load_elmes_per_thread =
84+
16; /*magic number that is good based on experiments.*/
85+
return 1.0 / (0.1 + std::abs(double(dataset.dim()) - double(DatasetBlockDim))) * TeamSize +
86+
1.0 / (0.1 + std::abs(double(dataset.dim()) / TeamSize / PqLen -
87+
preferred_load_elmes_per_thread));
8488
}
8589

8690
private:

0 commit comments

Comments
 (0)