Skip to content

Commit 17c3992

Browse files
authored
MB-59575: Reducing copy overhead of already memory mapped content (#17)
- Currently, from the parent layers (zapx) we are passing a pointer to the serialized byte array in the index file, which is mmap'd on golang side. - However, while reading the index from buffer we'd be reading (and performing memcpy's) over the entire set of vector codes in the index which can hamper performance and rss usage. - The PR intends to avoid these memcpy's and just have a pointer to the mmap'd byte array when feasible and retrieve the specific vector codes in the read-only paths in a lazy fashion
1 parent 38f6b60 commit 17c3992

File tree

9 files changed

+151
-9
lines changed

9 files changed

+151
-9
lines changed

c_api/index_io_c_ex.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,14 @@
2121
extern "C" {
2222
#endif
2323

24+
// skip prefetch phase while searching over the inverted lists
25+
#define FAISS_IO_FLAG_SKIP_PREFETCH 32
26+
// the following two macros together decide whether to read the index from an
27+
// already mmap'd data buffer. it's C equivalent of IO_FLAG_READ_MMAP from index_io.h
28+
// usage is - FAISS_IO_FLAG_READ_MMAP | FAISS_IO_FLAG_ONDISK_IVF
29+
#define FAISS_IO_FLAG_READ_MMAP 64
30+
#define FAISS_IO_FLAG_ONDISK_IVF 0x646f0000
31+
2432
/** Write index to buffer
2533
*/
2634
int faiss_write_index_buf(const FaissIndex* idx, size_t* buf_size, unsigned char** buf);

faiss/IndexFlat.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,10 @@ FlatCodesDistanceComputer* IndexFlat::get_FlatCodesDistanceComputer() const {
169169
}
170170

171171
void IndexFlat::reconstruct(idx_t key, float* recons) const {
172+
if (mmaped) {
173+
memcpy(recons, &(codes_ptr[key * code_size]), code_size);
174+
return;
175+
}
172176
memcpy(recons, &(codes[key * code_size]), code_size);
173177
}
174178

faiss/IndexFlat.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,15 @@ struct IndexFlat : IndexFlatCodes {
5454

5555
// get pointer to the floating point data
5656
float* get_xb() {
57+
if (mmaped) {
58+
return (float*)(codes_ptr);
59+
}
5760
return (float*)codes.data();
5861
}
5962
const float* get_xb() const {
63+
if (mmaped) {
64+
return (const float*)(codes_ptr);
65+
}
6066
return (const float*)codes.data();
6167
}
6268

faiss/IndexFlatCodes.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,20 @@ namespace faiss {
1818
IndexFlatCodes::IndexFlatCodes(size_t code_size, idx_t d, MetricType metric)
1919
: Index(d, metric), code_size(code_size) {}
2020

21-
IndexFlatCodes::IndexFlatCodes() : code_size(0) {}
21+
IndexFlatCodes::IndexFlatCodes() :
22+
code_size(0),
23+
mmaped_size(0),
24+
mmaped(false),
25+
codes_ptr(nullptr) {}
26+
27+
IndexFlatCodes::~IndexFlatCodes() {
28+
// setting the pointer to nullptr so that the mmap'd region is zero counted
29+
// from faiss side and safe to be free'd/GC'd etc. on calling application layer
30+
// of faiss.
31+
if (mmaped) {
32+
codes_ptr = nullptr;
33+
}
34+
}
2235

2336
void IndexFlatCodes::add(idx_t n, const float* x) {
2437
FAISS_THROW_IF_NOT(is_trained);

faiss/IndexFlatCodes.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,14 @@ struct IndexFlatCodes : Index {
2424

2525
/// encoded dataset, size ntotal * code_size
2626
std::vector<uint8_t> codes;
27+
uint8_t* codes_ptr;
28+
bool mmaped; // true if codes_ptr is pointing to a mmaped region
29+
size_t mmaped_size;
2730

2831
IndexFlatCodes();
2932

33+
~IndexFlatCodes() override;
34+
3035
IndexFlatCodes(size_t code_size, idx_t d, MetricType metric = METRIC_L2);
3136

3237
/// default add uses sa_encode

faiss/impl/index_read.cpp

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -188,12 +188,15 @@ static void read_ArrayInvertedLists_sizes(
188188
InvertedLists* read_InvertedLists(IOReader* f, int io_flags) {
189189
uint32_t h;
190190
READ1(h);
191+
bool load_mem = !((io_flags & IO_FLAG_READ_MMAP) ||
192+
(io_flags & IO_FLAG_SKIP_IVF_DATA));
193+
191194
if (h == fourcc("il00")) {
192195
fprintf(stderr,
193196
"read_InvertedLists:"
194197
" WARN! inverted lists not stored with IVF object\n");
195198
return nullptr;
196-
} else if (h == fourcc("ilar") && !(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
199+
} else if (h == fourcc("ilar") && load_mem) {
197200
size_t nlist, code_size;
198201
READ1(nlist);
199202
READ1(code_size);
@@ -212,7 +215,7 @@ InvertedLists* read_InvertedLists(IOReader* f, int io_flags) {
212215
}
213216
return ails;
214217

215-
} else if (h == fourcc("ilar") && (io_flags & IO_FLAG_SKIP_IVF_DATA)) {
218+
} else if (h == fourcc("ilar") && !load_mem) {
216219
// code is always ilxx where xx is specific to the type of invlists we
217220
// want so we get the 16 high bits from the io_flag and the 16 low bits
218221
// as "il"
@@ -520,6 +523,40 @@ static IndexIVFPQ* read_ivfpq(IOReader* f, uint32_t h, int io_flags) {
520523

521524
int read_old_fmt_hack = 0;
522525

526+
/**
527+
* flat indexes which store the codes directly, can use this API instead to have a
528+
* pointer to the mmaped region to avoid allocation costs. works specifically with
529+
* BufIOReader as of now.
530+
**/
531+
void read_codes_mmaped(IOReader* f, IndexFlat* idxf) {
532+
idxf->mmaped = true;
533+
534+
// read the size of codes data
535+
size_t size;
536+
READANDCHECK(&size, 1);
537+
FAISS_THROW_IF_NOT(size >= 0 && size < (uint64_t{1} << 40));
538+
size *= 4;
539+
540+
// size == ntotal * code_size == ntotal * d * sizeof(float) for IndexFlat
541+
// NOTE: the code_size value can change for indexes with encodings like
542+
// SQ, PQ although the size value will still be equal to ntotal * code_size
543+
// bytes which is accessible via codes_ptr.
544+
FAISS_THROW_IF_NOT(size == idxf->ntotal * idxf->code_size);
545+
idxf->mmaped_size = size;
546+
547+
// BufIOReader is the reader which has a direct pointer to the mmaped
548+
// byte array, so we can directly set the codes_ptr to the mmaped region
549+
BufIOReader* reader = dynamic_cast<BufIOReader*>(f);
550+
FAISS_THROW_IF_NOT_MSG(reader, "reading over mmap'd region is supported only with BufIOReader");
551+
FAISS_THROW_IF_NOT_MSG(reader->buf, "reader buffer is null");
552+
553+
idxf->codes_ptr = const_cast<uint8_t*>(reader->buf);
554+
// seek to the point where the codes section begins
555+
idxf->codes_ptr += reader->rp;
556+
// update read pointer appropriately
557+
reader->rp += size;
558+
}
559+
523560
Index* read_index(IOReader* f, int io_flags) {
524561
Index* idx = nullptr;
525562
uint32_t h;
@@ -535,9 +572,14 @@ Index* read_index(IOReader* f, int io_flags) {
535572
}
536573
read_index_header(idxf, f);
537574
idxf->code_size = idxf->d * sizeof(float);
538-
READXBVECTOR(idxf->codes);
539-
FAISS_THROW_IF_NOT(
575+
576+
if (io_flags & IO_FLAG_READ_MMAP) {
577+
read_codes_mmaped(f, idxf);
578+
} else {
579+
READXBVECTOR(idxf->codes);
580+
FAISS_THROW_IF_NOT(
540581
idxf->codes.size() == idxf->ntotal * idxf->code_size);
582+
}
541583
// leak!
542584
idx = idxf;
543585
} else if (h == fourcc("IxHE") || h == fourcc("IxHe")) {

faiss/index_io.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ const int IO_FLAG_ONDISK_SAME_DIR = 4;
5252
const int IO_FLAG_SKIP_IVF_DATA = 8;
5353
// don't initialize precomputed table after loading
5454
const int IO_FLAG_SKIP_PRECOMPUTE_TABLE = 16;
55+
// skip prefetch phase while searching over the inverted lists
56+
const int IO_FLAG_SKIP_PREFETCH = 32;
57+
// read the index from an already mmap'd data buffer
58+
const int IO_FLAG_READ_MMAP = 64 | 0x646f0000;
5559
// try to memmap data (useful to load an ArrayInvertedLists as an
5660
// OnDiskInvertedLists)
5761
const int IO_FLAG_MMAP = IO_FLAG_SKIP_IVF_DATA | 0x646f0000;

faiss/invlists/OnDiskInvertedLists.cpp

Lines changed: 62 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,12 @@ struct OnDiskInvertedLists::OngoingPrefetch {
257257
int OnDiskInvertedLists::OngoingPrefetch::global_cs = 0;
258258

259259
void OnDiskInvertedLists::prefetch_lists(const idx_t* list_nos, int n) const {
260+
261+
// avoid prefetch when the ondisk-ivf is already prepared for read-only paths
262+
// helpful when the queries are not batched
263+
if (skip_prefetch) {
264+
return;
265+
}
260266
pf->prefetch_lists(list_nos, n);
261267
}
262268

@@ -353,6 +359,8 @@ OnDiskInvertedLists::OnDiskInvertedLists(
353359
filename(filename),
354360
totsize(0),
355361
ptr(nullptr),
362+
pre_mapped(false),
363+
skip_prefetch(false),
356364
read_only(false),
357365
locks(new LockLevels()),
358366
pf(new OngoingPrefetch(this)),
@@ -369,11 +377,16 @@ OnDiskInvertedLists::~OnDiskInvertedLists() {
369377

370378
// unmap all lists
371379
if (ptr != nullptr) {
372-
int err = munmap(ptr, totsize);
373-
if (err != 0) {
374-
fprintf(stderr, "mumap error: %s", strerror(errno));
380+
if (!pre_mapped) {
381+
int err = munmap(ptr, totsize);
382+
if (err != 0) {
383+
fprintf(stderr, "mumap error: %s", strerror(errno));
384+
}
385+
} else {
386+
ptr = nullptr;
375387
}
376388
}
389+
377390
delete locks;
378391
}
379392

@@ -741,10 +754,48 @@ InvertedLists* OnDiskInvertedListsIOHook::read(IOReader* f, int io_flags)
741754
return od;
742755
}
743756

757+
/**
758+
* This function is just an alternate way to use the OnDiskInvertedLists.
759+
* It's useful when the index is read using BufIOReader from a uint8_t* buffer
760+
* which is already mmap'd by the application layer.
761+
* All the responbility of handling this mmap pointer now falls on the app layer
762+
**/
763+
InvertedLists* read_ArrayInvertedLists_MMAP(
764+
IOReader* f,
765+
OnDiskInvertedLists* ails,
766+
const std::vector<size_t>& sizes) {
767+
768+
// setting this true is to ensure that the destructor does not unmap
769+
// since the mmap control is on the parent layer of faiss.
770+
ails->pre_mapped = true;
771+
772+
BufIOReader* reader = dynamic_cast<BufIOReader*>(f);
773+
FAISS_THROW_IF_NOT_MSG(reader, "reading over mmap'd region is supported only with BufIOReader");
774+
775+
size_t o = reader->rp;
776+
ails->totsize = reader->buf_size;
777+
FAISS_THROW_IF_NOT(o <= ails->totsize);
778+
FAISS_THROW_IF_NOT_MSG(reader->buf, "reader buffer is null");
779+
// using the base pointer to the mmap'd region
780+
ails->ptr = const_cast<uint8_t*>(reader->buf);
781+
782+
for (size_t i = 0; i < ails->nlist; i++) {
783+
OnDiskInvertedLists::List& l = ails->lists[i];
784+
l.size = l.capacity = sizes[i];
785+
l.offset = o;
786+
o += l.size * (sizeof(idx_t) + ails->code_size);
787+
}
788+
789+
// updating the read pointer appropriately, this is needed when the IVF
790+
// wrapped with another index class.
791+
reader->rp = o;
792+
return ails;
793+
}
794+
744795
/** read from a ArrayInvertedLists into this invertedlist type */
745796
InvertedLists* OnDiskInvertedListsIOHook::read_ArrayInvertedLists(
746797
IOReader* f,
747-
int /* io_flags */,
798+
int io_flags,
748799
size_t nlist,
749800
size_t code_size,
750801
const std::vector<size_t>& sizes) const {
@@ -753,6 +804,13 @@ InvertedLists* OnDiskInvertedListsIOHook::read_ArrayInvertedLists(
753804
ails->code_size = code_size;
754805
ails->read_only = true;
755806
ails->lists.resize(nlist);
807+
if (io_flags & IO_FLAG_SKIP_PREFETCH) {
808+
ails->skip_prefetch = true;
809+
}
810+
811+
if (io_flags & IO_FLAG_READ_MMAP) {
812+
return read_ArrayInvertedLists_MMAP(f, ails, sizes);
813+
}
756814

757815
FileIOReader* reader = dynamic_cast<FileIOReader*>(f);
758816
FAISS_THROW_IF_NOT_MSG(reader, "mmap only supported for File objects");

faiss/invlists/OnDiskInvertedLists.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ struct OnDiskInvertedLists : InvertedLists {
7777
size_t totsize;
7878
uint8_t* ptr; // mmap base pointer
7979
bool read_only; /// are inverted lists mapped read-only
80+
bool pre_mapped;// whether the content is already mmap'd before class creation
81+
bool skip_prefetch; // whether to skip prefetching the lists while performing search
8082

8183
OnDiskInvertedLists(size_t nlist, size_t code_size, const char* filename);
8284

0 commit comments

Comments
 (0)