Skip to content
Closed
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions faiss/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ set(FAISS_SRC
impl/io.cpp
impl/kmeans1d.cpp
impl/lattice_Zn.cpp
impl/mapped_io.cpp
impl/pq4_fast_scan.cpp
impl/pq4_fast_scan_search_1.cpp
impl/pq4_fast_scan_search_qbs.cpp
impl/residual_quantizer_encode_steps.cpp
impl/io.cpp
impl/lattice_Zn.cpp
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do these two file disappear?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

duplicates

impl/zerocopy_io.cpp
impl/NNDescent.cpp
invlists/BlockInvertedLists.cpp
invlists/DirectMap.cpp
Expand Down
2 changes: 1 addition & 1 deletion faiss/IndexFlatCodes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ CodePacker* IndexFlatCodes::get_CodePacker() const {
}

void IndexFlatCodes::permute_entries(const idx_t* perm) {
std::vector<uint8_t> new_codes(codes.size());
MaybeOwnedVector<uint8_t> new_codes(codes.size());

for (idx_t i = 0; i < ntotal; i++) {
memcpy(new_codes.data() + i * code_size,
Expand Down
6 changes: 4 additions & 2 deletions faiss/IndexFlatCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@

#pragma once

#include <vector>

#include <faiss/Index.h>
#include <faiss/impl/DistanceComputer.h>
#include <vector>
#include <faiss/impl/maybe_owned_vector.h>

namespace faiss {

Expand All @@ -21,7 +23,7 @@ struct IndexFlatCodes : Index {
size_t code_size;

/// encoded dataset, size ntotal * code_size
std::vector<uint8_t> codes;
MaybeOwnedVector<uint8_t> codes;

IndexFlatCodes();

Expand Down
2 changes: 1 addition & 1 deletion faiss/impl/HNSW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1085,7 +1085,7 @@ void HNSW::permute_entries(const idx_t* map) {
// swap everyone
std::swap(levels, new_levels);
std::swap(offsets, new_offsets);
std::swap(neighbors, new_neighbors);
neighbors = std::move(new_neighbors);
}

/**************************************************************
Expand Down
3 changes: 2 additions & 1 deletion faiss/impl/HNSW.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include <faiss/Index.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/maybe_owned_vector.h>
#include <faiss/impl/platform_macros.h>
#include <faiss/utils/Heap.h>
#include <faiss/utils/random.h>
Expand Down Expand Up @@ -121,7 +122,7 @@ struct HNSW {

/// neighbors[offsets[i]:offsets[i+1]] is the list of neighbors of vector i
/// for all levels. this is where all storage goes.
std::vector<storage_idx_t> neighbors;
MaybeOwnedVector<storage_idx_t> neighbors;

/// entry point in the search structure (one of the points with maximum
/// level
Expand Down
205 changes: 189 additions & 16 deletions faiss/impl/index_read.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,167 @@
#include <faiss/IndexBinaryHash.h>
#include <faiss/IndexBinaryIVF.h>

// mmap-ing and viewing facilities
#include <faiss/impl/maybe_owned_vector.h>

#include <faiss/impl/mapped_io.h>
#include <faiss/impl/zerocopy_io.h>

namespace faiss {

/*************************************************************
* Mmap-ing and viewing facilities
**************************************************************/

template <typename VectorT>
void read_vector_with_size(VectorT& target, IOReader* f, size_t size) {
ZeroCopyIOReader* zr = dynamic_cast<ZeroCopyIOReader*>(f);
if (zr != nullptr) {
if constexpr (is_maybe_owned_vector_v<VectorT>) {
// create a view
char* address = nullptr;
size_t nread = zr->get_data_view(
(void**)&address,
sizeof(typename VectorT::value_type),
size);

FAISS_THROW_IF_NOT_FMT(
nread == (size),
"read error in %s: %zd != %zd (%s)",
f->name.c_str(),
nread,
size_t(size),
strerror(errno));

VectorT view = VectorT::create_view(address, nread);
target = std::move(view);

return;
}
}

target.resize(size);
READANDCHECK(target.data(), size);
}

template <typename VectorT>
void read_vector(VectorT& target, IOReader* f) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be possible to enable this via an IO flag ? The IVF memory mapped files work on regular FileIOReader

https://github.com/facebookresearch/faiss/blob/main/faiss/invlists/OnDiskInvertedLists.cpp#L771

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

> OnDiskInvertedLists is unsupported on Windows.

my code allows mmap functionality on Windows as well. Let me take a look

// is it a mmap-enabled reader?
MappedFileIOReader* mf = dynamic_cast<MappedFileIOReader*>(f);
if (mf != nullptr) {
// check if the use case is right
if constexpr (is_maybe_owned_vector_v<VectorT>) {
// read the size
size_t size = 0;
READANDCHECK(&size, 1);
// ok, mmap and check
char* address = nullptr;
const size_t nread = mf->mmap(
(void**)&address,
sizeof(typename VectorT::value_type),
size);

FAISS_THROW_IF_NOT_FMT(
nread == (size),
"read error in %s: %zd != %zd (%s)",
f->name.c_str(),
nread,
size,
strerror(errno));

VectorT mmapped_view =
VectorT::create_view(address, nread, mf->mmap_owner);
target = std::move(mmapped_view);

return;
}
}

// is it a zero-copy reader?
ZeroCopyIOReader* zr = dynamic_cast<ZeroCopyIOReader*>(f);
if (zr != nullptr) {
if constexpr (is_maybe_owned_vector_v<VectorT>) {
// read the size first
size_t size = target.size();
READANDCHECK(&size, 1);

// create a view
char* address = nullptr;
size_t nread = zr->get_data_view(
(void**)&address,
sizeof(typename VectorT::value_type),
size);
VectorT view = VectorT::create_view(address, nread, nullptr);
target = std::move(view);

return;
}
}

// the default case
READVECTOR(target);
}

template <typename VectorT>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

possible to factorize with previous function?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a good suggestion, let me see...

void read_xb_vector(VectorT& target, IOReader* f) {
// is it a mmap-enabled reader?
MappedFileIOReader* mf = dynamic_cast<MappedFileIOReader*>(f);
if (mf != nullptr) {
// check if the use case is right
if constexpr (is_maybe_owned_vector_v<VectorT>) {
// read the size
size_t size = 0;
READANDCHECK(&size, 1);

size *= 4;

// ok, mmap and check
char* address = nullptr;
const size_t nread = mf->mmap(
(void**)&address,
sizeof(typename VectorT::value_type),
size);

FAISS_THROW_IF_NOT_FMT(
nread == (size),
"read error in %s: %zd != %zd (%s)",
f->name.c_str(),
nread,
size,
strerror(errno));

VectorT mmapped_view =
VectorT::create_view(address, nread, mf->mmap_owner);
target = std::move(mmapped_view);

return;
}
}

ZeroCopyIOReader* zr = dynamic_cast<ZeroCopyIOReader*>(f);
if (zr != nullptr) {
if constexpr (std::is_same_v<VectorT, MaybeOwnedVector<uint8_t>>) {
// read the size first
size_t size = target.size();
READANDCHECK(&size, 1);

size *= 4;

char* address = nullptr;
size_t nread = zr->get_data_view(
(void**)&address,
sizeof(typename VectorT::value_type),
size);
VectorT view = VectorT::create_view(address, nread, nullptr);
target = std::move(view);
return;
}
}

// the default case
READXBVECTOR(target);
}

/*************************************************************
* Read
**************************************************************/
Expand Down Expand Up @@ -275,7 +434,7 @@ static void read_AdditiveQuantizer(AdditiveQuantizer* aq, IOReader* f) {
aq->search_type == AdditiveQuantizer::ST_norm_cqint4 ||
aq->search_type == AdditiveQuantizer::ST_norm_lsq2x4 ||
aq->search_type == AdditiveQuantizer::ST_norm_rq2x4) {
READXBVECTOR(aq->qnorm.codes);
read_xb_vector(aq->qnorm.codes, f);
aq->qnorm.ntotal = aq->qnorm.codes.size() / 4;
aq->qnorm.update_permutation();
}
Expand Down Expand Up @@ -365,7 +524,7 @@ static void read_HNSW(HNSW* hnsw, IOReader* f) {
READVECTOR(hnsw->cum_nneighbor_per_level);
READVECTOR(hnsw->levels);
READVECTOR(hnsw->offsets);
READVECTOR(hnsw->neighbors);
read_vector(hnsw->neighbors, f);

READ1(hnsw->entry_point);
READ1(hnsw->max_level);
Expand Down Expand Up @@ -545,7 +704,7 @@ Index* read_index(IOReader* f, int io_flags) {
}
read_index_header(idxf, f);
idxf->code_size = idxf->d * sizeof(float);
READXBVECTOR(idxf->codes);
read_xb_vector(idxf->codes, f);
FAISS_THROW_IF_NOT(
idxf->codes.size() == idxf->ntotal * idxf->code_size);
// leak!
Expand Down Expand Up @@ -576,7 +735,7 @@ Index* read_index(IOReader* f, int io_flags) {
idxl->rrot = *rrot;
delete rrot;
}
READVECTOR(idxl->codes);
read_vector(idxl->codes, f);
FAISS_THROW_IF_NOT(
idxl->rrot.d_in == idxl->d && idxl->rrot.d_out == idxl->nbits);
FAISS_THROW_IF_NOT(
Expand All @@ -589,7 +748,7 @@ Index* read_index(IOReader* f, int io_flags) {
read_index_header(idxp, f);
read_ProductQuantizer(&idxp->pq, f);
idxp->code_size = idxp->pq.code_size;
READVECTOR(idxp->codes);
read_vector(idxp->codes, f);
if (h == fourcc("IxPo") || h == fourcc("IxPq")) {
READ1(idxp->search_type);
READ1(idxp->encode_signs);
Expand All @@ -611,28 +770,28 @@ Index* read_index(IOReader* f, int io_flags) {
read_ResidualQuantizer(&idxr->rq, f, io_flags);
}
READ1(idxr->code_size);
READVECTOR(idxr->codes);
read_vector(idxr->codes, f);
idx = idxr;
} else if (h == fourcc("IxLS")) {
auto idxr = new IndexLocalSearchQuantizer();
read_index_header(idxr, f);
read_LocalSearchQuantizer(&idxr->lsq, f);
READ1(idxr->code_size);
READVECTOR(idxr->codes);
read_vector(idxr->codes, f);
idx = idxr;
} else if (h == fourcc("IxPR")) {
auto idxpr = new IndexProductResidualQuantizer();
read_index_header(idxpr, f);
read_ProductResidualQuantizer(&idxpr->prq, f, io_flags);
READ1(idxpr->code_size);
READVECTOR(idxpr->codes);
read_vector(idxpr->codes, f);
idx = idxpr;
} else if (h == fourcc("IxPL")) {
auto idxpl = new IndexProductLocalSearchQuantizer();
read_index_header(idxpl, f);
read_ProductLocalSearchQuantizer(&idxpl->plsq, f);
READ1(idxpl->code_size);
READVECTOR(idxpl->codes);
read_vector(idxpl->codes, f);
idx = idxpl;
} else if (h == fourcc("ImRQ")) {
ResidualCoarseQuantizer* idxr = new ResidualCoarseQuantizer();
Expand Down Expand Up @@ -789,7 +948,7 @@ Index* read_index(IOReader* f, int io_flags) {
IndexScalarQuantizer* idxs = new IndexScalarQuantizer();
read_index_header(idxs, f);
read_ScalarQuantizer(&idxs->sq, f);
READVECTOR(idxs->codes);
read_vector(idxs->codes, f);
idxs->code_size = idxs->sq.code_size;
idx = idxs;
} else if (h == fourcc("IxLa")) {
Expand Down Expand Up @@ -947,7 +1106,7 @@ Index* read_index(IOReader* f, int io_flags) {
READ1(idxp->code_size_1);
READ1(idxp->code_size_2);
READ1(idxp->code_size);
READVECTOR(idxp->codes);
read_vector(idxp->codes, f);
idx = idxp;
} else if (
h == fourcc("IHNf") || h == fourcc("IHNp") || h == fourcc("IHNs") ||
Expand Down Expand Up @@ -1071,14 +1230,28 @@ Index* read_index(IOReader* f, int io_flags) {
}

Index* read_index(FILE* f, int io_flags) {
FileIOReader reader(f);
return read_index(&reader, io_flags);
if ((io_flags & IO_FLAG_MMAP_IFC) == IO_FLAG_MMAP_IFC) {
// enable mmap-supporting IOReader
auto owner = std::make_shared<MmappedFileMappingOwner>(f);
MappedFileIOReader reader(owner);
return read_index(&reader, io_flags);
} else {
FileIOReader reader(f);
return read_index(&reader, io_flags);
}
}

Index* read_index(const char* fname, int io_flags) {
FileIOReader reader(fname);
Index* idx = read_index(&reader, io_flags);
return idx;
if ((io_flags & IO_FLAG_MMAP_IFC) == IO_FLAG_MMAP_IFC) {
// enable mmap-supporting IOReader
auto owner = std::make_shared<MmappedFileMappingOwner>(fname);
MappedFileIOReader reader(owner);
return read_index(&reader, io_flags);
} else {
FileIOReader reader(fname);
Index* idx = read_index(&reader, io_flags);
return idx;
}
}

VectorTransform* read_VectorTransform(const char* fname) {
Expand Down
4 changes: 2 additions & 2 deletions faiss/impl/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@

#pragma once

#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <string>
#include <vector>

#include <faiss/Index.h>

namespace faiss {

struct IOReader {
Expand Down
Loading
Loading