1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
|
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c -*-
#ifndef FAISS_INDEX_IVF_C_H
#define FAISS_INDEX_IVF_C_H
#include "Clustering_c.h"
#include "Index_c.h"
#include "faiss_c.h"
#include "impl/AuxIndexStructures_c.h"
#ifdef __cplusplus
extern "C" {
#endif
FAISS_DECLARE_CLASS_INHERITED(SearchParametersIVF, SearchParameters)
FAISS_DECLARE_DESTRUCTOR(SearchParametersIVF)
FAISS_DECLARE_SEARCH_PARAMETERS_DOWNCAST(SearchParametersIVF)
int faiss_SearchParametersIVF_new(FaissSearchParametersIVF** p_sp);
int faiss_SearchParametersIVF_new_with(
FaissSearchParametersIVF** p_sp,
FaissIDSelector* sel,
size_t nprobe,
size_t max_codes);
FAISS_DECLARE_GETTER(SearchParametersIVF, const FaissIDSelector*, sel)
FAISS_DECLARE_GETTER_SETTER(SearchParametersIVF, size_t, nprobe)
FAISS_DECLARE_GETTER_SETTER(SearchParametersIVF, size_t, max_codes)
/** Index based on a inverted file (IVF)
*
* In the inverted file, the quantizer (an Index instance) provides a
* quantization index for each vector to be added. The quantization
* index maps to a list (aka inverted list or posting list), where the
* id of the vector is then stored.
*
* At search time, the vector to be searched is also quantized, and
* only the list corresponding to the quantization index is
* searched. This speeds up the search by making it
* non-exhaustive. This can be relaxed using multi-probe search: a few
* (nprobe) quantization indices are selected and several inverted
* lists are visited.
*
* Sub-classes implement a post-filtering of the index that refines
* the distance estimation from the query to database vectors.
*/
FAISS_DECLARE_CLASS_INHERITED(IndexIVF, Index)
FAISS_DECLARE_DESTRUCTOR(IndexIVF)
FAISS_DECLARE_INDEX_DOWNCAST(IndexIVF)
/// number of possible key values
FAISS_DECLARE_GETTER(IndexIVF, size_t, nlist)
/// number of probes at query time
FAISS_DECLARE_GETTER_SETTER(IndexIVF, size_t, nprobe)
/// quantizer that maps vectors to inverted lists
FAISS_DECLARE_GETTER(IndexIVF, FaissIndex*, quantizer)
/**
* = 0: use the quantizer as index in a kmeans training
* = 1: just pass on the training set to the train() of the quantizer
* = 2: kmeans training on a flat index + add the centroids to the quantizer
*/
FAISS_DECLARE_GETTER(IndexIVF, char, quantizer_trains_alone)
/// whether object owns the quantizer
FAISS_DECLARE_GETTER_SETTER(IndexIVF, int, own_fields)
/** moves the entries from another dataset to self. On output,
* other is empty. add_id is added to all moved ids (for
* sequential ids, this would be this->ntotal */
int faiss_IndexIVF_merge_from(
FaissIndexIVF* index,
FaissIndexIVF* other,
idx_t add_id);
/** copy a subset of the entries index to the other index
*
* if subset_type == 0: copies ids in [a1, a2)
* if subset_type == 1: copies ids if id % a1 == a2
* if subset_type == 2: copies inverted lists such that a1
* elements are left before and a2 elements are after
*/
int faiss_IndexIVF_copy_subset_to(
const FaissIndexIVF* index,
FaissIndexIVF* other,
int subset_type,
idx_t a1,
idx_t a2);
/** search a set of vectors, that are pre-quantized by the IVF
* quantizer. Fill in the corresponding heaps with the query
* results. search() calls this.
*
* @param n nb of vectors to query
* @param x query vectors, size nx * d
* @param assign coarse quantization indices, size nx * nprobe
* @param centroid_dis
* distances to coarse centroids, size nx * nprobe
* @param distance
* output distances, size n * k
* @param labels output labels, size n * k
* @param store_pairs store inv list index + inv list offset
* instead in upper/lower 32 bit of result,
* instead of ids (used for reranking).
*/
int faiss_IndexIVF_search_preassigned(
const FaissIndexIVF* index,
idx_t n,
const float* x,
idx_t k,
const idx_t* assign,
const float* centroid_dis,
float* distances,
idx_t* labels,
int store_pairs);
size_t faiss_IndexIVF_get_list_size(const FaissIndexIVF* index, size_t list_no);
/** initialize a direct map
*
* @param new_maintain_direct_map if true, create a direct map,
* else clear it
*/
int faiss_IndexIVF_make_direct_map(
FaissIndexIVF* index,
int new_maintain_direct_map);
/** Check the inverted lists' imbalance factor.
*
* 1= perfectly balanced, >1: imbalanced
*/
double faiss_IndexIVF_imbalance_factor(const FaissIndexIVF* index);
/// display some stats about the inverted lists of the index
void faiss_IndexIVF_print_stats(const FaissIndexIVF* index);
/// Get the IDs in an inverted list. IDs are written to `invlist`, which must be
/// large enough
//// to accommodate the full list.
///
/// @param list_no the list ID
/// @param invlist output pointer to a slice of memory, at least as long as the
/// list's size
/// @see faiss_IndexIVF_get_list_size(size_t)
void faiss_IndexIVF_invlists_get_ids(
const FaissIndexIVF* index,
size_t list_no,
idx_t* invlist);
int faiss_IndexIVF_train_encoder(
FaissIndexIVF* index,
idx_t n,
const float* x,
const idx_t* assign);
typedef struct FaissIndexIVFStats {
size_t nq; // nb of queries run
size_t nlist; // nb of inverted lists scanned
size_t ndis; // nb of distances computed
size_t nheap_updates; // nb of times the heap was updated
double quantization_time; // time spent quantizing vectors (in ms)
double search_time; // time spent searching lists (in ms)
} FaissIndexIVFStats;
void faiss_IndexIVFStats_reset(FaissIndexIVFStats* stats);
inline void faiss_IndexIVFStats_init(FaissIndexIVFStats* stats) {
faiss_IndexIVFStats_reset(stats);
}
/// global var that collects all statists
FaissIndexIVFStats* faiss_get_indexIVF_stats();
#ifdef __cplusplus
}
#endif
#endif
|