Skip to content

Commit 4fbcc7f

Browse files
authored
Move faiss_select from raft to cuvs (#1658)
Move `detail/neighbors/faiss_select` from raft to cuvs. We will remove these files from raft after this PR. Authors: - Anupam (https://github.com/aamijar) Approvers: - Tarang Jain (https://github.com/tarang-jain) URL: #1658
1 parent 5ae2863 commit 4fbcc7f

12 files changed

Lines changed: 1800 additions & 13 deletions

cpp/src/neighbors/ball_cover/registers.cuh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@
1111
#include "registers_types.cuh" // DistFunc
1212
#include <cuvs/neighbors/ball_cover.hpp>
1313

14+
#include "../detail/faiss_select/key_value_block_select.cuh"
1415
#include <raft/core/resource/cuda_stream.hpp>
1516
#include <raft/core/resource/thrust_policy.hpp>
1617
#include <raft/linalg/unary_op.cuh>
17-
#include <raft/neighbors/detail/faiss_select/key_value_block_select.cuh>
1818
#include <raft/util/cuda_utils.cuh>
1919

2020
#include <thrust/count.h>
@@ -166,7 +166,7 @@ RAFT_KERNEL compute_final_dists_registers(const value_t* X_reordered,
166166
local_x_ptr[j] = x_ptr[j];
167167
}
168168

169-
using namespace raft::neighbors::detail::faiss_select;
169+
using namespace cuvs::neighbors::detail::faiss_select;
170170
KeyValueBlockSelect<value_t, value_idx, false, Comparator<value_t>, warp_q, thread_q, tpb> heap(
171171
std::numeric_limits<value_t>::max(),
172172
std::numeric_limits<value_t>::max(),
@@ -326,7 +326,7 @@ RAFT_KERNEL block_rbc_kernel_registers(const value_t* X_reordered,
326326
}
327327

328328
// Each warp works on 1 R
329-
using namespace raft::neighbors::detail::faiss_select;
329+
using namespace cuvs::neighbors::detail::faiss_select;
330330
KeyValueBlockSelect<value_t, value_idx, false, Comparator<value_t>, warp_q, thread_q, tpb> heap(
331331
std::numeric_limits<value_t>::max(),
332332
std::numeric_limits<value_t>::max(),
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/**
2+
* SPDX-FileCopyrightText: Copyright (c) Facebook, Inc. and its affiliates.
3+
* SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
/*
7+
* Copyright (c) Facebook, Inc. and its affiliates.
8+
*
9+
* This source code is licensed under the MIT license found in the
10+
* LICENSE file thirdparty/LICENSES/LICENSE.faiss
11+
*/
12+
13+
#pragma once
14+
15+
#include <cuda.h>
16+
#include <cuda_fp16.h>
17+
18+
namespace cuvs::neighbors::detail::faiss_select {
19+
20+
template <typename T>
21+
struct Comparator {
22+
__device__ static inline bool lt(T a, T b) { return a < b; }
23+
24+
__device__ static inline bool gt(T a, T b) { return a > b; }
25+
};
26+
27+
template <>
28+
struct Comparator<half> {
29+
__device__ static inline bool lt(half a, half b) { return __hlt(a, b); }
30+
31+
__device__ static inline bool gt(half a, half b) { return __hgt(a, b); }
32+
};
33+
34+
} // namespace cuvs::neighbors::detail::faiss_select
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/**
2+
* SPDX-FileCopyrightText: Copyright (c) Facebook, Inc. and its affiliates.
3+
* SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
/*
7+
* Copyright (c) Facebook, Inc. and its affiliates.
8+
*
9+
* This source code is licensed under the MIT license found in the
10+
* LICENSE file thirdparty/LICENSES/LICENSE.faiss
11+
*/
12+
13+
#pragma once
14+
15+
namespace cuvs::neighbors::detail::faiss_select {
16+
// If the inner size (dim) of the vectors is small, we want a larger query tile
17+
// size, like 1024
18+
inline void chooseTileSize(size_t numQueries,
19+
size_t numCentroids,
20+
size_t dim,
21+
size_t elementSize,
22+
size_t totalMem,
23+
size_t& tileRows,
24+
size_t& tileCols)
25+
{
26+
// The matrix multiplication should be large enough to be efficient, but if
27+
// it is too large, we seem to lose efficiency as opposed to
28+
// double-streaming. Each tile size here defines 1/2 of the memory use due
29+
// to double streaming. We ignore available temporary memory, as that is
30+
// adjusted independently by the user and can thus meet these requirements
31+
// (or not). For <= 4 GB GPUs, prefer 512 MB of usage. For <= 8 GB GPUs,
32+
// prefer 768 MB of usage. Otherwise, prefer 1 GB of usage.
33+
size_t targetUsage = 0;
34+
35+
if (totalMem <= ((size_t)4) * 1024 * 1024 * 1024) {
36+
targetUsage = 512 * 1024 * 1024;
37+
} else if (totalMem <= ((size_t)8) * 1024 * 1024 * 1024) {
38+
targetUsage = 768 * 1024 * 1024;
39+
} else {
40+
targetUsage = 1024 * 1024 * 1024;
41+
}
42+
43+
targetUsage /= 2 * elementSize;
44+
45+
// 512 seems to be a batch size sweetspot for float32.
46+
// If we are on float16, increase to 512.
47+
// If the k size (vec dim) of the matrix multiplication is small (<= 32),
48+
// increase to 1024.
49+
size_t preferredTileRows = 512;
50+
if (dim <= 32) { preferredTileRows = 1024; }
51+
52+
tileRows = std::min(preferredTileRows, numQueries);
53+
54+
// tileCols is the remainder size
55+
tileCols = std::min(targetUsage / preferredTileRows, numCentroids);
56+
}
57+
} // namespace cuvs::neighbors::detail::faiss_select

0 commit comments

Comments
 (0)