Skip to content

Commit 4c202db

Browse files
authored
Merge branch 'main' into histogram-neon
2 parents 34bac57 + 2d7dd5b commit 4c202db

File tree

269 files changed

+9660
-3120
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

269 files changed

+9660
-3120
lines changed

.circleci/config.yml

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,29 +45,31 @@ jobs:
4545
- checkout
4646
- run:
4747
name: Build faiss library
48+
no_output_timeout: 30m
4849
command: |
4950
cmake -B build -DBUILD_TESTING=ON -DFAISS_ENABLE_GPU=OFF \
5051
-DFAISS_OPT_LEVEL=<< parameters.opt_level >> \
5152
-DFAISS_ENABLE_C_API=ON \
5253
-DCMAKE_BUILD_TYPE=Release -DBLA_VENDOR=Intel10_64_dyn .
53-
make -k -C build -j3 faiss
54+
make -k -C build -j$(nproc) faiss
5455
- when:
5556
condition:
5657
equal: [ "avx2", << parameters.opt_level >> ]
5758
steps:
5859
- run:
5960
name: Build faiss_avx2 library
60-
command: make -k -C build -j3 faiss_avx2 swigfaiss_avx2
61+
no_output_timeout: 30m
62+
command: make -k -C build -j$(nproc) faiss_avx2 swigfaiss_avx2
6163
- run:
6264
name: Test faiss library
6365
command: |
64-
make -C build -j3 faiss_test
66+
make -C build -j$(nproc) faiss_test
6567
export GTEST_OUTPUT="xml:$(realpath .)/test-results/googletest/"
6668
make -C build test
6769
- run:
6870
name: Build python extension
6971
command: |
70-
make -C build -j3 swigfaiss
72+
make -C build -j$(nproc) swigfaiss
7173
cd build/faiss/python
7274
python3 setup.py build
7375
- run:
@@ -87,10 +89,12 @@ jobs:
8789
build_linux_conda:
8890
docker:
8991
- image: continuumio/miniconda3
92+
resource_class: medium+
9093
steps:
9194
- checkout
9295
- run:
9396
name: Conda build
97+
no_output_timeout: 30m
9498
command: |
9599
conda install -y -q conda-build
96100
cd conda
@@ -166,17 +170,17 @@ jobs:
166170
-DFAISS_ENABLE_C_API=ON \
167171
-DCMAKE_BUILD_TYPE=Release \
168172
-DPython_EXECUTABLE=$PYTHON .
169-
make -k -C build -j3 faiss
173+
make -k -C build -j$(nproc) faiss
170174
- run:
171175
name: Test faiss library
172176
command: |
173-
make -C build -j3 faiss_test
177+
make -C build -j$(nproc) faiss_test
174178
export GTEST_OUTPUT="xml:$(realpath .)/test-results/googletest/"
175179
make -C build test
176180
- run:
177181
name: Build python extension
178182
command: |
179-
make -C build -j3 swigfaiss
183+
make -C build -j$(nproc) swigfaiss
180184
cd build/faiss/python
181185
$PYTHON setup.py build
182186
- run:
@@ -217,6 +221,7 @@ jobs:
217221
default: main
218222
docker:
219223
- image: continuumio/miniconda3
224+
resource_class: medium+
220225
steps:
221226
- checkout
222227
- run:
@@ -226,6 +231,7 @@ jobs:
226231
conda config --set anaconda_upload yes
227232
- run:
228233
name: Build packages
234+
no_output_timeout: 30m
229235
environment:
230236
PACKAGE_TYPE: <<parameters.label>>
231237
command: |

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ ENV LD_PRELOAD /usr/lib64/libgomp.so.1:/opt/intel/mkl/lib/intel64/libmkl_def.so:
1313
# Install necessary build tools
1414
RUN yum install -y gcc-c++ make swig3
1515

16-
# Install necesary headers/libs
16+
# Install necessary headers/libs
1717
RUN yum install -y python-devel numpy
1818

1919
COPY . /opt/faiss

Doxyfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -786,7 +786,7 @@ EXCLUDE_SYMLINKS = NO
786786
# Note that the wildcards are matched against the file with absolute path, so to
787787
# exclude all test directories for example use the pattern */test/*
788788

789-
EXCLUDE_PATTERNS = */impl/*
789+
EXCLUDE_PATTERNS =
790790

791791
# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
792792
# (namespaces, classes, functions, etc.) that should be excluded from the

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ For public discussion of Faiss or for questions, there is a Facebook group at ht
7474
We monitor the [issues page](http://github.com/facebookresearch/faiss/issues) of the repository.
7575
You can report bugs, ask questions, etc.
7676

77-
## License
77+
## Legal
7878

79-
Faiss is MIT-licensed.
79+
Faiss is MIT-licensed, refer to the [LICENSE file](https://github.com/facebookresearch/faiss/blob/main/LICENSE) in the top level directory.
80+
81+
Copyright © Meta Platforms, Inc. See the [Terms of Use](https://opensource.fb.com/legal/terms/) and [Privacy Policy](https://opensource.fb.com/legal/privacy/) for this project.

benchs/bench_all_ivf/bench_all_ivf.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,20 @@
33
# This source code is licensed under the MIT license found in the
44
# LICENSE file in the root directory of this source tree.
55

6+
import argparse
67
import os
78
import sys
89
import time
9-
import pdb
10-
import numpy as np
10+
1111
import faiss
12-
import argparse
13-
import datasets
14-
from datasets import sanitize
12+
import numpy as np
13+
14+
try:
15+
import datasets_fb as datasets
16+
except ModuleNotFoundError:
17+
import datasets_oss as datasets
18+
19+
sanitize = datasets.sanitize
1520

1621

1722
######################################################
@@ -262,8 +267,7 @@ def apply_AQ_options(index, args):
262267
print("Getting centroids from", args.get_centroids_from)
263268
src_index = faiss.read_index(args.get_centroids_from)
264269
src_quant = faiss.downcast_index(src_index.quantizer)
265-
centroids = faiss.vector_to_array(src_quant.xb)
266-
centroids = centroids.reshape(-1, d)
270+
centroids = src_quant.reconstruct_n()
267271
print(" centroid table shape", centroids.shape)
268272

269273
if isinstance(vec_transform, faiss.VectorTransform):
@@ -333,7 +337,7 @@ def apply_AQ_options(index, args):
333337

334338
xq = sanitize(ds.get_queries())
335339
gt = ds.get_groundtruth(k=args.k)
336-
assert gt.shape[1] == args.k, pdb.set_trace()
340+
assert gt.shape[1] == args.k
337341

338342
if args.searchthreads != -1:
339343
print("Setting nb of threads to", args.searchthreads)

benchs/bench_all_ivf/cmp_with_scann.py

Lines changed: 30 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -75,28 +75,37 @@ def aa(*args, **kwargs):
7575
k = args.k
7676
nrun = args.nrun
7777

78-
if args.lib == "faiss":
78+
if not os.path.exists(cache_dir + "xb.npy"):
7979
# prepare cache
80-
import faiss
8180
from datasets import load_dataset
82-
8381
ds = load_dataset(args.db, download=args.download)
8482
print(ds)
85-
if not os.path.exists(cache_dir + "xb.npy"):
86-
# store for SCANN
87-
os.system(f"rm -rf {cache_dir}; mkdir -p {cache_dir}")
88-
tosave = dict(
89-
# xt = ds.get_train(10),
90-
xb = ds.get_database(),
91-
xq = ds.get_queries(),
92-
gt = ds.get_groundtruth()
93-
)
94-
for name, v in tosave.items():
95-
fname = cache_dir + "/" + name + ".npy"
96-
print("save", fname)
97-
np.save(fname, v)
98-
99-
open(cache_dir + "metric", "w").write(ds.metric)
83+
# store for SCANN
84+
os.system(f"rm -rf {cache_dir}; mkdir -p {cache_dir}")
85+
tosave = dict(
86+
xb = ds.get_database(),
87+
xq = ds.get_queries(),
88+
gt = ds.get_groundtruth()
89+
)
90+
for name, v in tosave.items():
91+
fname = cache_dir + "/" + name + ".npy"
92+
print("save", fname)
93+
np.save(fname, v)
94+
95+
open(cache_dir + "metric", "w").write(ds.metric)
96+
97+
dataset = {}
98+
for kn in "xb xq gt".split():
99+
fname = cache_dir + "/" + kn + ".npy"
100+
print("load", fname)
101+
dataset[kn] = np.load(fname)
102+
xb = dataset["xb"]
103+
xq = dataset["xq"]
104+
gt = dataset["gt"]
105+
distance_measure = open(cache_dir + "metric").read()
106+
107+
if args.lib == "faiss":
108+
import faiss
100109

101110
name1_to_metric = {
102111
"IP": faiss.METRIC_INNER_PRODUCT,
@@ -106,14 +115,10 @@ def aa(*args, **kwargs):
106115
index_fname = cache_dir + "index.faiss"
107116
if not os.path.exists(index_fname):
108117
index = faiss_make_index(
109-
ds.get_database(), name1_to_metric[ds.metric], index_fname)
118+
xb, name1_to_metric[distance_measure], index_fname)
110119
else:
111120
index = faiss.read_index(index_fname)
112121

113-
xb = ds.get_database()
114-
xq = ds.get_queries()
115-
gt = ds.get_groundtruth()
116-
117122
faiss_eval_search(
118123
index, xq, xb, nprobe_tab, pre_reorder_k_tab, k, gt,
119124
nrun, args.measure
@@ -122,32 +127,22 @@ def aa(*args, **kwargs):
122127
if args.lib == "scann":
123128
from scann.scann_ops.py import scann_ops_pybind
124129

125-
dataset = {}
126-
for kn in "xb xq gt".split():
127-
fname = cache_dir + "/" + kn + ".npy"
128-
print("load", fname)
129-
dataset[kn] = np.load(fname)
130130
name1_to_name2 = {
131131
"IP": "dot_product",
132132
"L2": "squared_l2"
133133
}
134-
distance_measure = name1_to_name2[open(cache_dir + "metric").read()]
135-
136-
xb = dataset["xb"]
137-
xq = dataset["xq"]
138-
gt = dataset["gt"]
139134

140135
scann_dir = cache_dir + "/scann1.1.1_serialized"
141136
if os.path.exists(scann_dir + "/scann_config.pb"):
142137
searcher = scann_ops_pybind.load_searcher(scann_dir)
143138
else:
144-
searcher = scann_make_index(xb, distance_measure, scann_dir, 0)
139+
searcher = scann_make_index(xb, name1_to_name2[distance_measure], scann_dir, 0)
145140

146141
scann_dir = cache_dir + "/scann1.1.1_serialized_reorder"
147142
if os.path.exists(scann_dir + "/scann_config.pb"):
148143
searcher_reo = scann_ops_pybind.load_searcher(scann_dir)
149144
else:
150-
searcher_reo = scann_make_index(xb, distance_measure, scann_dir, 100)
145+
searcher_reo = scann_make_index(xb, name1_to_name2[distance_measure], scann_dir, 100)
151146

152147
scann_eval_search(
153148
searcher, searcher_reo,
@@ -256,7 +251,6 @@ def faiss_make_index(xb, metric_type, fname):
256251
# index.by_residual = False
257252

258253
print("train")
259-
# index.train(ds.get_train())
260254
index.train(xb[:250000])
261255
print("add")
262256
index.add(xb)

benchs/bench_all_ivf/datasets.py renamed to benchs/bench_all_ivf/datasets_oss.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ def load_dataset(dataset='deep1M', compute_gt=False, download=False):
8181
f"{centdir}/clustering.dbdeep1M.IVF{ncent}.faissindex"
8282
)
8383

84-
8584
elif dataset.startswith("deep"):
8685

8786
szsuf = dataset[4:]

benchs/bench_big_batch_ivf.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
# Copyright (c) Facebook, Inc. and its affiliates.
2+
#
3+
# This source code is licensed under the MIT license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
import argparse
7+
import time
8+
9+
import faiss
10+
11+
import numpy as np
12+
13+
from faiss.contrib.datasets import SyntheticDataset
14+
from faiss.contrib.ivf_tools import big_batch_search
15+
16+
parser = argparse.ArgumentParser()
17+
18+
19+
def aa(*args, **kwargs):
20+
group.add_argument(*args, **kwargs)
21+
22+
23+
group = parser.add_argument_group('dataset options')
24+
aa('--dim', type=int, default=64)
25+
aa('--size', default="S")
26+
27+
group = parser.add_argument_group('index options')
28+
aa('--nlist', type=int, default=100)
29+
aa('--factory_string', default="", help="overrides nlist")
30+
aa('--k', type=int, default=10)
31+
aa('--nprobe', type=int, default=5)
32+
aa('--nt', type=int, default=-1, help="nb search threads")
33+
aa('--method', default="pairwise_distances", help="")
34+
35+
args = parser.parse_args()
36+
print("args:", args)
37+
38+
if args.size == "S":
39+
ds = SyntheticDataset(32, 2000, 4000, 1000)
40+
elif args.size == "M":
41+
ds = SyntheticDataset(32, 20000, 40000, 10000)
42+
elif args.size == "L":
43+
ds = SyntheticDataset(32, 200000, 400000, 100000)
44+
else:
45+
raise RuntimeError(f"dataset size {args.size} not supported")
46+
47+
nlist = args.nlist
48+
nprobe = args.nprobe
49+
k = args.k
50+
51+
52+
def tic(name):
53+
global tictoc
54+
tictoc = (name, time.time())
55+
print(name, end="\r", flush=True)
56+
57+
58+
def toc():
59+
global tictoc
60+
name, t0 = tictoc
61+
dt = time.time() - t0
62+
print(f"{name}: {dt:.3f} s")
63+
return dt
64+
65+
66+
print(f"dataset {ds}, {nlist=:} {nprobe=:} {k=:}")
67+
68+
if args.factory_string == "":
69+
factory_string = f"IVF{nlist},Flat"
70+
else:
71+
factory_string = args.factory_string
72+
73+
print(f"instantiate {factory_string}")
74+
index = faiss.index_factory(ds.d, factory_string)
75+
76+
if args.factory_string != "":
77+
nlist = index.nlist
78+
79+
print("nlist", nlist)
80+
81+
tic("train")
82+
index.train(ds.get_train())
83+
toc()
84+
85+
tic("add")
86+
index.add(ds.get_database())
87+
toc()
88+
89+
if args.nt != -1:
90+
print("setting nb of threads to", args.nt)
91+
faiss.omp_set_num_threads(args.nt)
92+
93+
tic("reference search")
94+
index.nprobe
95+
index.nprobe = nprobe
96+
Dref, Iref = index.search(ds.get_queries(), k)
97+
t_ref = toc()
98+
99+
tic("block search")
100+
Dnew, Inew = big_batch_search(
101+
index, ds.get_queries(),
102+
k, method=args.method, verbose=10
103+
)
104+
t_tot = toc()
105+
106+
assert (Inew != Iref).sum() / Iref.size < 1e-4
107+
np.testing.assert_almost_equal(Dnew, Dref, decimal=4)
108+
109+
print(f"total block search time {t_tot:.3f} s, speedup {t_ref / t_tot:.3f}x")

0 commit comments

Comments
 (0)