Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 0 additions & 58 deletions demos/offline_ivf/offline_ivf.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,64 +227,6 @@ def _iterate_transformed(self, ds, start, batch_size, dt):
for buffer in ds.iterate(start, batch_size, dt):
yield buffer

def index_shard_and_quantize(self):
assert os.path.exists(self.index_template_file)
index = faiss.read_index(self.index_template_file)
index_ivf = faiss.downcast_index(faiss.extract_index_ivf(index))
assert self.nprobe <= index_ivf.quantizer.ntotal, (
f"the number of vectors {index_ivf.quantizer.ntotal} is not enough"
f" to retrieve {self.nprobe} neighbours, check."
)

if is_pretransform_index(index):
d = index.chain.at(0).d_out
else:
d = self.input_d
for i in range(0, self.nshards):
sfn = f"{self.index_shard_prefix}{i}"
cqfn = f"{self.coarse_quantization_prefix}{i}" # fixme
if os.path.exists(sfn) or os.path.exists(cqfn):
logging.info(f"skipping shard: {i}")
continue
try:
with open(cqfn, "xb") as cqf:
index.reset()
start = i * self.shard_size
j = 0
quantizer = faiss.index_cpu_to_all_gpus(
index_ivf.quantizer
)
for xb_j in tqdm(
self._iterate_transformed(
self.xb_ds,
start,
EMBEDDINGS_BATCH_SIZE,
np.float32,
),
file=sys.stdout,
):
assert xb_j.shape[1] == d
_, I = quantizer.search(xb_j, self.nprobe)
assert np.amin(I) >= 0, f"{I}"
assert np.amax(I) < index_ivf.nlist
cqf.write(I)
self._index_add_core_wrapper( # fixme
index_ivf,
xb_j,
np.arange(start + j, start + j + xb_j.shape[0]),
I[:, 0],
)
j += xb_j.shape[0]
assert j <= self.shard_size
if j == self.shard_size:
break
logging.info(f"writing {sfn}...")
faiss.write_index(index, sfn)
except FileExistsError:
logging.info(f"skipping shard: {i}")
continue
logging.info("done")

def index_shard(self):
assert os.path.exists(self.index_template_file)
index = faiss.read_index(self.index_template_file)
Expand Down