From c566bdc3b9edd0aba61e130a7a89853d7d129aa7 Mon Sep 17 00:00:00 2001 From: Kumar Saurabh Arora Date: Fri, 31 May 2024 10:26:52 -0700 Subject: [PATCH] Adding buck target for experiment bench_fw_ivf (#3423) Summary: Adding small fixes to run experiments from fbcode. 1. Added buck target 2. Full import path of faiss bench_fw modules 3. new dataset path to run tests locally as we can't use an existing directory ./data in fbcode. Reviewed By: algoriddle, junjieqi Differential Revision: D57235092 --- benchs/bench_fw_ivf.py | 13 +++++++++---- contrib/datasets.py | 6 +++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/benchs/bench_fw_ivf.py b/benchs/bench_fw_ivf.py index 8c84743e27..e9e144c569 100644 --- a/benchs/bench_fw_ivf.py +++ b/benchs/bench_fw_ivf.py @@ -3,16 +3,20 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -import logging import argparse +import logging import os -from bench_fw.benchmark import Benchmark -from bench_fw.benchmark_io import BenchmarkIO -from bench_fw.descriptors import DatasetDescriptor, IndexDescriptor +from faiss.benchs.bench_fw.benchmark import Benchmark +from faiss.benchs.bench_fw.benchmark_io import BenchmarkIO +from faiss.benchs.bench_fw.descriptors import ( + DatasetDescriptor, + IndexDescriptor, +) logging.basicConfig(level=logging.INFO) + def sift1M(bio): benchmark = Benchmark( num_threads=32, @@ -37,6 +41,7 @@ def sift1M(bio): benchmark.set_io(bio) benchmark.benchmark(result_file="result.json", local=False, train=True, reconstruct=False, knn=True, range=False) + def bigann(bio): for scale in [1, 2, 5, 10, 20, 50]: benchmark = Benchmark( diff --git a/contrib/datasets.py b/contrib/datasets.py index f37a2fb6e4..281f16e2fa 100644 --- a/contrib/datasets.py +++ b/contrib/datasets.py @@ -6,6 +6,8 @@ import os import numpy as np import faiss +import getpass + from .vecs_io import fvecs_read, ivecs_read, bvecs_mmap, fvecs_mmap from .exhaustive_search import knn @@ -115,10 +117,12 @@ def get_groundtruth(self, k=100): # that directory is ############################################################################ +username = getpass.getuser() for dataset_basedir in ( '/datasets01/simsearch/041218/', - '/mnt/vol/gfsai-flash3-east/ai-group/datasets/simsearch/'): + '/mnt/vol/gfsai-flash3-east/ai-group/datasets/simsearch/', + f'/home/{username}/simsearch/data/'): if os.path.exists(dataset_basedir): break else: