facebookresearch
diff --git a/‎demos/demo_qinco.py‎
Lines changed: 77 additions & 0 deletions b/‎demos/demo_qinco.py‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎faiss/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎faiss/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎faiss/python/__init__.py‎
Lines changed: 8 additions & 0 deletions b/‎faiss/python/__init__.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎faiss/python/class_wrappers.py‎
Lines changed: 130 additions & 0 deletions b/‎faiss/python/class_wrappers.py‎
Lines changed: 130 additions & 0 deletions
diff --git a/‎faiss/python/swigfaiss.swig‎
Lines changed: 4 additions & 1 deletion b/‎faiss/python/swigfaiss.swig‎
Lines changed: 4 additions & 1 deletion
@@ -0,0 +1,77 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+This demonstrates how to reproduce the QINCo paper results using the Faiss 
+QINCo implementation. The code loads the reference model because training is not 
+implemented in Faiss.
+
+Prepare the data with
+
+cd /tmp
+
+# get the reference qinco code
+git clone https://github.com/facebookresearch/Qinco.git
+
+# get the data
+wget https://dl.fbaipublicfiles.com/QINCo/datasets/bigann/bigann1M.bvecs
+
+# get the model
+wget https://dl.fbaipublicfiles.com/QINCo/models/bigann_8x8_L2.pt
+
+"""
+
+import numpy as np
+from faiss.contrib.vecs_io import bvecs_mmap
+import sys
+import time
+import torch
+import faiss
+
+# make sure pickle deserialization will work
+sys.path.append("/tmp/Qinco")
+import model_qinco
+
+with torch.no_grad():
+
+    qinco = torch.load("/tmp/bigann_8x8_L2.pt")
+    qinco.eval()
+    # print(qinco)
+    if True:
+        torch.set_num_threads(1)
+        faiss.omp_set_num_threads(1)
+
+    x_base = bvecs_mmap("/tmp/bigann1M.bvecs")[:1000].astype('float32')
+    x_scaled = torch.from_numpy(x_base) / qinco.db_scale
+
+    t0 = time.time()
+    codes, _ = qinco.encode(x_scaled)
+    x_decoded_scaled = qinco.decode(codes)
+    print(f"Pytorch encode {time.time() - t0:.3f} s")
+    # multi-thread: 1.13s, single-thread: 7.744
+
+    x_decoded = x_decoded_scaled.numpy() * qinco.db_scale
+
+    err = ((x_decoded - x_base) ** 2).sum(1).mean()
+    print("MSE=", err)  # = 14211.956, near the L=2 result in Fig 4 of the paper
+
+    qinco2 = faiss.QINCo(qinco)
+    t0 = time.time()
+    codes2 = qinco2.encode(faiss.Tensor2D(x_scaled))
+    x_decoded2 = qinco2.decode(codes2).numpy() * qinco.db_scale
+    print(f"Faiss encode {time.time() - t0:.3f} s")
+    # multi-thread: 3.2s, single thread: 7.019
+
+    # these tests don't work because there are outlier encodings
+    # np.testing.assert_array_equal(codes.numpy(), codes2.numpy())
+    # np.testing.assert_allclose(x_decoded, x_decoded2)
+
+    ndiff = (codes.numpy() != codes2.numpy()).sum() / codes.numel()
+    assert ndiff < 0.01
+    ndiff = (((x_decoded - x_decoded2) ** 2).sum(1) > 1e-5).sum()
+    assert ndiff / len(x_base) < 0.01
+
+    err = ((x_decoded2 - x_base) ** 2).sum(1).mean()
+    print("MSE=", err)  # = 14213.551
@@ -81,6 +81,7 @@ set(FAISS_SRC
   invlists/InvertedLists.cpp
   invlists/InvertedListsIOHook.cpp
   utils/Heap.cpp
+  utils/NeuralNet.cpp
   utils/WorkerThread.cpp
   utils/distances.cpp
   utils/distances_simd.cpp
 
@@ -44,6 +44,14 @@
 class_wrappers.handle_IDSelectorSubset(IDSelectorBitmap, class_owns=False, force_int64=False)
 class_wrappers.handle_CodeSet(CodeSet)
 
+class_wrappers.handle_Tensor2D(Tensor2D)
+class_wrappers.handle_Tensor2D(Int32Tensor2D)
+class_wrappers.handle_Embedding(Embedding)
+class_wrappers.handle_Linear(Linear)
+class_wrappers.handle_QINCo(QINCo)
+class_wrappers.handle_QINCoStep(QINCoStep)
+
+
 this_module = sys.modules[__name__]
 
 # handle sub-classes
 
@@ -1247,3 +1247,133 @@ def replacement_insert(self, codes, inserted=None):
         return inserted
 
     replace_method(the_class, 'insert', replacement_insert)
+
+######################################################
+# Syntatic sugar for NeuralNet classes
+######################################################
+
+def handle_Tensor2D(the_class):
+    the_class.original_init = the_class.__init__
+
+    def replacement_init(self, *args):
+        if len(args) == 1:
+            array, = args
+            n, d = array.shape
+            self.original_init(n, d)
+            faiss.copy_array_to_vector(
+                np.ascontiguousarray(array).ravel(), self.v)
+        else:
+            self.original_init(*args)
+
+    def numpy(self):
+        shape = np.zeros(2, dtype=np.int64)
+        faiss.memcpy(faiss.swig_ptr(shape), self.shape, shape.nbytes)
+        return faiss.vector_to_array(self.v).reshape(shape[0], shape[1])
+
+    the_class.__init__ = replacement_init
+    the_class.numpy = numpy
+
+
+def handle_Embedding(the_class):
+    the_class.original_init = the_class.__init__
+
+    def replacement_init(self, *args):
+        if len(args) != 1 or args[0].__class__ == the_class:
+            self.original_init(*args)
+            return
+        # assume it's a torch.Embedding
+        emb = args[0]
+        self.original_init(emb.num_embeddings, emb.embedding_dim)
+        self.from_torch(emb)
+
+    def from_torch(self, emb):
+        """ copy weights from torch.Embedding """
+        assert emb.weight.shape == (self.num_embeddings, self.embedding_dim)
+        faiss.copy_array_to_vector(
+            np.ascontiguousarray(emb.weight.data).ravel(), self.weight)
+
+    the_class.from_torch = from_torch
+    the_class.__init__ = replacement_init
+
+
+def handle_Linear(the_class):
+    the_class.original_init = the_class.__init__
+
+    def replacement_init(self, *args):
+        if len(args) != 1 or args[0].__class__ == the_class:
+            self.original_init(*args)
+            return
+        # assume it's a torch.Linear
+        linear = args[0]
+        bias = linear.bias is not None
+        self.original_init(linear.in_features, linear.out_features, bias)
+        self.from_torch(linear)
+
+    def from_torch(self, linear):
+        """ copy weights from torch.Linear """
+        assert linear.weight.shape == (self.out_features,    self.in_features)
+        faiss.copy_array_to_vector(linear.weight.data.numpy().ravel(), self.weight)
+        if linear.bias is not None:
+            assert linear.bias.shape == (self.out_features,)
+            faiss.copy_array_to_vector(linear.bias.data.numpy(), self.bias)
+
+    the_class.__init__ = replacement_init
+    the_class.from_torch = from_torch
+
+######################################################
+# Syntatic sugar for QINCo and QINCoStep
+######################################################
+
+def handle_QINCoStep(the_class):
+    the_class.original_init = the_class.__init__
+
+    def replacement_init(self, *args):
+        if len(args) != 1 or args[0].__class__ == the_class:
+            self.original_init(*args)
+            return
+        step = args[0]
+        # assume it's a Torch QINCoStep
+        self.original_init(step.d, step.K, step.L, step.h)
+        self.from_torch(step)
+
+    def from_torch(self, step):
+        """ copy weights from torch.QINCoStep """
+        assert (step.d, step.K, step.L, step.h) == (self.d, self.K, self.L, self.h)
+        self.codebook.from_torch(step.codebook)
+        self.MLPconcat.from_torch(step.MLPconcat)
+
+        for l in range(step.L):
+            src = step.residual_blocks[l]
+            dest = self.get_residual_block(l)
+            dest.linear1.from_torch(src[0])
+            dest.linear2.from_torch(src[2])
+
+    the_class.__init__ = replacement_init
+    the_class.from_torch = from_torch
+
+
+def handle_QINCo(the_class):
+    the_class.original_init = the_class.__init__
+
+    def replacement_init(self, *args):
+        if len(args) != 1 or args[0].__class__ == the_class:
+            self.original_init(*args)
+            return
+
+        # assume it's a Torch QINCo
+        qinco = args[0]
+        self.original_init(qinco.d, qinco.K, qinco.L, qinco.M, qinco.h)
+        self.from_torch(qinco)
+
+    def from_torch(self, qinco):
+        """ copy weights from torch.QINCo """
+        assert (
+            (qinco.d, qinco.K, qinco.L, qinco.M, qinco.h) == 
+            (self.d, self.K, self.L, self.M, self.h)
+        )
+        self.codebook0.from_torch(qinco.codebook0)
+        for m in range(qinco.M - 1):
+            self.get_step(m).from_torch(qinco.steps[m])
+
+    the_class.__init__ = replacement_init
+    the_class.from_torch = from_torch
@@ -145,6 +145,7 @@ typedef uint64_t size_t;
 #include <faiss/impl/LocalSearchQuantizer.h>
 #include <faiss/impl/ProductAdditiveQuantizer.h>
 #include <faiss/impl/CodePacker.h>
+#include <faiss/utils/NeuralNet.h>
 
 #include <faiss/invlists/BlockInvertedLists.h>
 
@@ -257,7 +258,6 @@ namespace std {
 %template(ClusteringIterationStatsVector) std::vector<faiss::ClusteringIterationStats>;
 %template(ParameterRangeVector) std::vector<faiss::ParameterRange>;
 
-
 #ifndef SWIGWIN
 %template(OnDiskOneListVector) std::vector<faiss::OnDiskOneList>;
 #endif // !SWIGWIN
@@ -530,6 +530,9 @@ struct faiss::simd16uint16 {};
 
 %include  <faiss/IndexRowwiseMinMax.h>
 
+%include <faiss/utils/NeuralNet.h>
+%template(Tensor2D) faiss::nn::Tensor2DTemplate<float>;
+%template(Int32Tensor2D) faiss::nn::Tensor2DTemplate<int32_t>;
 
 %ignore faiss::BufferList::Buffer;
 %ignore faiss::RangeSearchPartialResult::QueryResult;