4444#include < cstdio>
4545#include < vector>
4646
47+ #include < sys/mman.h>
48+
4749namespace cuvs ::neighbors::cagra::detail {
4850
4951template <typename IdxT>
@@ -410,6 +412,52 @@ void optimize(
410412 res, knn_graph_internal, new_graph_internal, guarantee_connectivity);
411413}
412414
415+ // RAII wrapper for allocating memory with Transparent HugePage
416+ struct mmap_owner {
417+ // Allocate a new memory (not backed by a file)
418+ mmap_owner (size_t size) : size_{size}
419+ {
420+ int flags = MAP_ANONYMOUS | MAP_PRIVATE;
421+ ptr_ = mmap (nullptr , size, PROT_READ | PROT_WRITE, flags, -1 , 0 );
422+ if (ptr_ == MAP_FAILED) {
423+ ptr_ = nullptr ;
424+ throw std::runtime_error (" cuvs::mmap_owner error" );
425+ }
426+ if (madvise (ptr_, size, MADV_HUGEPAGE) != 0 ) {
427+ munmap (ptr_, size);
428+ ptr_ = nullptr ;
429+ throw std::runtime_error (" cuvs::mmap_owner error" );
430+ }
431+ }
432+
433+ ~mmap_owner () noexcept
434+ {
435+ if (ptr_ != nullptr ) { munmap (ptr_, size_); }
436+ }
437+
438+ // No copies for owning struct
439+ mmap_owner (const mmap_owner& res) = delete ;
440+ auto operator =(const mmap_owner& other) -> mmap_owner& = delete ;
441+ // Moving is fine
442+ mmap_owner (mmap_owner&& other)
443+ : ptr_{std::exchange (other.ptr_ , nullptr )}, size_{std::exchange (other.size_ , 0 )}
444+ {
445+ }
446+ auto operator =(mmap_owner&& other) -> mmap_owner&
447+ {
448+ std::swap (this ->ptr_ , other.ptr_ );
449+ std::swap (this ->size_ , other.size_ );
450+ return *this ;
451+ }
452+
453+ [[nodiscard]] auto data () const -> void* { return ptr_; }
454+ [[nodiscard]] auto size () const -> size_t { return size_; }
455+
456+ private:
457+ void * ptr_;
458+ size_t size_;
459+ };
460+
413461template <typename T,
414462 typename IdxT = uint32_t ,
415463 typename Accessor = raft::host_device_accessor<std::experimental::default_accessor<T>,
@@ -493,6 +541,14 @@ auto iterative_build_graph(
493541 }
494542 }
495543
544+ // Allocate memory for neighbors list using Transparent HugePage
545+ constexpr size_t thp_size = 2 * 1024 * 1024 ;
546+ size_t byte_size = sizeof (IdxT) * final_graph_size * topk;
547+ if (byte_size % thp_size) { byte_size += thp_size - (byte_size % thp_size); }
548+ mmap_owner neighbors_list (byte_size);
549+ IdxT* neighbors_ptr = (IdxT*)neighbors_list.data ();
550+ memset (neighbors_ptr, 0 , byte_size);
551+
496552 auto curr_graph_size = initial_graph_size;
497553 while (true ) {
498554 RAFT_LOG_DEBUG (" # graph_size = %lu (%.3lf)" ,
@@ -524,7 +580,9 @@ auto iterative_build_graph(
524580
525581 auto dev_query_view = raft::make_device_matrix_view<const T, int64_t >(
526582 dev_dataset.data_handle (), (int64_t )curr_query_size, dev_dataset.extent (1 ));
527- auto neighbors = raft::make_host_matrix<IdxT, int64_t >(curr_query_size, curr_topk);
583+
584+ auto neighbors_view =
585+ raft::make_host_matrix_view<IdxT, int64_t >(neighbors_ptr, curr_query_size, curr_topk);
528586
529587 // Search.
530588 // Since there are many queries, divide them into batches and search them.
@@ -551,7 +609,7 @@ auto iterative_build_graph(
551609 batch_dev_distances_view);
552610
553611 auto batch_neighbors_view = raft::make_host_matrix_view<IdxT, int64_t >(
554- neighbors .data_handle () + batch.offset () * curr_topk, batch.size (), curr_topk);
612+ neighbors_view .data_handle () + batch.offset () * curr_topk, batch.size (), curr_topk);
555613 raft::copy (batch_neighbors_view.data_handle (),
556614 batch_dev_neighbors_view.data_handle (),
557615 batch_neighbors_view.size (),
@@ -564,7 +622,7 @@ auto iterative_build_graph(
564622 cagra_graph = raft::make_host_matrix<IdxT, int64_t >(0 , 0 ); // delete existing grahp
565623 cagra_graph = raft::make_host_matrix<IdxT, int64_t >(curr_graph_size, curr_graph_degree);
566624 optimize<IdxT>(
567- res, neighbors. view () , cagra_graph.view (), flag_last ? params.guarantee_connectivity : 0 );
625+ res, neighbors_view , cagra_graph.view (), flag_last ? params.guarantee_connectivity : 0 );
568626 if (flag_last) { break ; }
569627 }
570628
0 commit comments