1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Binary indexes (de)serialization"""
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import unittest
import faiss
def make_binary_dataset(d, nb, nt, nq):
assert d % 8 == 0
x = np.random.randint(256, size=(nb + nq + nt, int(d / 8))).astype('uint8')
return x[:nt], x[nt:-nq], x[-nq:]
class TestBinaryFlat(unittest.TestCase):
def __init__(self, *args, **kwargs):
unittest.TestCase.__init__(self, *args, **kwargs)
d = 32
nt = 0
nb = 1500
nq = 500
(_, self.xb, self.xq) = make_binary_dataset(d, nb, nt, nq)
def test_flat(self):
d = self.xq.shape[1] * 8
index = faiss.IndexBinaryFlat(d)
index.add(self.xb)
D, I = index.search(self.xq, 3)
index2 = faiss.deserialize_index_binary(faiss.serialize_index_binary(index))
D2, I2 = index2.search(self.xq, 3)
assert (I2 == I).all()
assert (D2 == D).all()
class TestBinaryIVF(unittest.TestCase):
def __init__(self, *args, **kwargs):
unittest.TestCase.__init__(self, *args, **kwargs)
d = 32
nt = 200
nb = 1500
nq = 500
(self.xt, self.xb, self.xq) = make_binary_dataset(d, nb, nt, nq)
def test_ivf_flat(self):
d = self.xq.shape[1] * 8
quantizer = faiss.IndexBinaryFlat(d)
index = faiss.IndexBinaryIVF(quantizer, d, 8)
index.cp.min_points_per_centroid = 5 # quiet warning
index.nprobe = 4
index.train(self.xt)
index.add(self.xb)
D, I = index.search(self.xq, 3)
index2 = faiss.deserialize_index_binary(faiss.serialize_index_binary(index))
D2, I2 = index2.search(self.xq, 3)
assert (I2 == I).all()
assert (D2 == D).all()
class TestObjectOwnership(unittest.TestCase):
def __init__(self, *args, **kwargs):
unittest.TestCase.__init__(self, *args, **kwargs)
d = 32
nt = 200
nb = 1500
nq = 500
(self.xt, self.xb, self.xq) = make_binary_dataset(d, nb, nt, nq)
def test_read_index_ownership(self):
d = self.xq.shape[1] * 8
index = faiss.IndexBinaryFlat(d)
index.add(self.xb)
# this is the output of read_index_binary (==> checks ownership)
index2 = faiss.deserialize_index_binary(faiss.serialize_index_binary(index))
assert index2.thisown
class TestBinaryFromFloat(unittest.TestCase):
def __init__(self, *args, **kwargs):
unittest.TestCase.__init__(self, *args, **kwargs)
d = 32
nt = 200
nb = 1500
nq = 500
(self.xt, self.xb, self.xq) = make_binary_dataset(d, nb, nt, nq)
def test_binary_from_float(self):
d = self.xq.shape[1] * 8
float_index = faiss.IndexHNSWFlat(d, 16)
index = faiss.IndexBinaryFromFloat(float_index)
index.add(self.xb)
D, I = index.search(self.xq, 3)
index2 = faiss.deserialize_index_binary(faiss.serialize_index_binary(index))
D2, I2 = index2.search(self.xq, 3)
assert (I2 == I).all()
assert (D2 == D).all()
class TestBinaryHNSW(unittest.TestCase):
def __init__(self, *args, **kwargs):
unittest.TestCase.__init__(self, *args, **kwargs)
d = 32
nt = 200
nb = 1500
nq = 500
(self.xt, self.xb, self.xq) = make_binary_dataset(d, nb, nt, nq)
def test_hnsw(self):
d = self.xq.shape[1] * 8
index = faiss.IndexBinaryHNSW(d)
index.add(self.xb)
D, I = index.search(self.xq, 3)
index2 = faiss.deserialize_index_binary(faiss.serialize_index_binary(index))
D2, I2 = index2.search(self.xq, 3)
assert (I2 == I).all()
assert (D2 == D).all()
def test_ivf_hnsw(self):
d = self.xq.shape[1] * 8
quantizer = faiss.IndexBinaryHNSW(d)
index = faiss.IndexBinaryIVF(quantizer, d, 8)
index.cp.min_points_per_centroid = 5 # quiet warning
index.nprobe = 4
index.train(self.xt)
index.add(self.xb)
D, I = index.search(self.xq, 3)
index2 = faiss.deserialize_index_binary(faiss.serialize_index_binary(index))
D2, I2 = index2.search(self.xq, 3)
assert (I2 == I).all()
assert (D2 == D).all()
|