1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
|
/** @file omenquireinternal.h
* @brief Internals
*/
/* Copyright 1999,2000,2001 BrightStation PLC
* Copyright 2001,2002 Ananova Ltd
* Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015,2016 Olly Betts
* Copyright 2009 Lemur Consulting Ltd
* Copyright 2011 Action Without Borders
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
#ifndef OM_HGUARD_OMENQUIREINTERNAL_H
#define OM_HGUARD_OMENQUIREINTERNAL_H
#include "xapian/database.h"
#include "xapian/document.h"
#include "xapian/enquire.h"
#include "xapian/query.h"
#include "xapian/keymaker.h"
#include <algorithm>
#include <cmath>
#include <map>
#include <set>
#include <unordered_map>
#include "weight/weightinternal.h"
using namespace std;
class OmExpand;
class MultiMatch;
namespace Xapian {
class TermIterator;
namespace Internal {
/** An item resulting from a query.
* This item contains the document id, and the weight calculated for
* the document.
*/
class MSetItem {
public:
MSetItem(double wt_, Xapian::docid did_)
: wt(wt_), did(did_), collapse_count(0) {}
MSetItem(double wt_, Xapian::docid did_, const string &key_)
: wt(wt_), did(did_), collapse_key(key_), collapse_count(0) {}
MSetItem(double wt_, Xapian::docid did_, const string &key_,
Xapian::doccount collapse_count_)
: wt(wt_), did(did_), collapse_key(key_),
collapse_count(collapse_count_) {}
void swap(MSetItem & o) {
std::swap(wt, o.wt);
std::swap(did, o.did);
std::swap(collapse_key, o.collapse_key);
std::swap(collapse_count, o.collapse_count);
std::swap(sort_key, o.sort_key);
}
/** Weight calculated. */
double wt;
/** Document id. */
Xapian::docid did;
/** Value which was used to collapse upon.
*
* If the collapse option is not being used, this will always
* have a null value.
*
* If the collapse option is in use, this will contain the collapse
* key's value for this particular item. If the key is not present
* for this item, the value will be a null string. Only one instance
* of each key value (apart from the null string) will be present in
* the items in the returned Xapian::MSet.
*/
string collapse_key;
/** Count of collapses done on collapse_key so far
*
* This is normally 0, and goes up for each collapse done
* It is not necessarily an indication of how many collapses
* might be done if an exhaustive match was done
*/
Xapian::doccount collapse_count;
/** Used when sorting by value. */
string sort_key;
/// Return a string describing this object.
string get_description() const;
};
}
/** Internals of enquire system.
* This allows the implementation of Xapian::Enquire to be hidden and reference
* counted.
*/
class Enquire::Internal : public Xapian::Internal::intrusive_base {
friend class MSet::Internal;
private:
/// The database which this enquire object uses.
const Xapian::Database db;
/// The user's query.
Query query;
/// The query length.
termcount qlen;
/// Copy not allowed
Internal(const Internal &);
/// Assignment not allowed
void operator=(const Internal &);
public:
typedef enum { REL, VAL, VAL_REL, REL_VAL } sort_setting;
Xapian::valueno collapse_key;
Xapian::doccount collapse_max;
Xapian::Enquire::docid_order order;
int percent_cutoff;
double weight_cutoff;
Xapian::valueno sort_key;
sort_setting sort_by;
bool sort_value_forward;
Xapian::Internal::opt_intrusive_ptr<KeyMaker> sorter;
double time_limit;
/** The weight to use for this query.
*
* This is mutable so that the default BM25Weight object can be
* created lazily when first required.
*/
mutable Weight * weight;
/// The weighting scheme to use for query expansion.
std::string eweightname;
/// The parameter required for TradWeight query expansion.
double expand_k;
vector<Xapian::Internal::opt_intrusive_ptr<MatchSpy>> spies;
explicit Internal(const Xapian::Database &databases);
~Internal();
/** Request a document from the database.
*/
void request_doc(const Xapian::Internal::MSetItem &item) const;
/** Read a previously requested document from the database.
*/
Xapian::Document read_doc(const Xapian::Internal::MSetItem &item) const;
Xapian::Document get_document(const Xapian::Internal::MSetItem &item) const;
void set_query(const Query & query_, termcount qlen_);
const Query & get_query() const;
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
Xapian::doccount check_at_least,
const RSet *omrset,
const MatchDecider *mdecider) const;
ESet get_eset(Xapian::termcount maxitems, const RSet & omrset, int flags,
const ExpandDecider *edecider, double min_wt) const;
TermIterator get_matching_terms(Xapian::docid did) const;
TermIterator get_matching_terms(const Xapian::MSetIterator &it) const;
Xapian::doccount get_termfreq(const string &tname) const;
string get_description() const;
};
class MSet::Internal : public Xapian::Internal::intrusive_base {
public:
/// Factor to multiply weights by to convert them to percentages.
double percent_factor;
private:
/** The set of documents which have been requested but not yet
* collected.
*/
mutable set<Xapian::doccount> requested_docs;
/// Cache of documents, indexed by MSet index.
mutable map<Xapian::doccount, Xapian::Document> indexeddocs;
/// Read and cache the documents so far requested.
void read_docs() const;
/// Copy not allowed
Internal(const Internal &);
/// Assignment not allowed
void operator=(const Internal &);
mutable std::unordered_map<std::string, double> snippet_bg_relevance;
public:
/// Xapian::Enquire reference, for getting documents.
Xapian::Internal::intrusive_ptr<const Enquire::Internal> enquire;
/** Provides the term frequency and weight for each term in the query. */
Xapian::Weight::Internal * stats;
/// A list of items comprising the (selected part of the) MSet.
vector<Xapian::Internal::MSetItem> items;
/// Rank of first item in MSet.
Xapian::doccount firstitem;
Xapian::doccount matches_lower_bound;
Xapian::doccount matches_estimated;
Xapian::doccount matches_upper_bound;
Xapian::doccount uncollapsed_lower_bound;
Xapian::doccount uncollapsed_estimated;
Xapian::doccount uncollapsed_upper_bound;
double max_possible;
double max_attained;
Internal()
: percent_factor(0),
stats(NULL),
firstitem(0),
matches_lower_bound(0),
matches_estimated(0),
matches_upper_bound(0),
uncollapsed_lower_bound(0),
uncollapsed_estimated(0),
uncollapsed_upper_bound(0),
max_possible(0),
max_attained(0) {}
/// Note: destroys parameter items.
Internal(Xapian::doccount firstitem_,
Xapian::doccount matches_upper_bound_,
Xapian::doccount matches_lower_bound_,
Xapian::doccount matches_estimated_,
Xapian::doccount uncollapsed_upper_bound_,
Xapian::doccount uncollapsed_lower_bound_,
Xapian::doccount uncollapsed_estimated_,
double max_possible_,
double max_attained_,
vector<Xapian::Internal::MSetItem> &items_,
double percent_factor_)
: percent_factor(percent_factor_),
stats(NULL),
firstitem(firstitem_),
matches_lower_bound(matches_lower_bound_),
matches_estimated(matches_estimated_),
matches_upper_bound(matches_upper_bound_),
uncollapsed_lower_bound(uncollapsed_lower_bound_),
uncollapsed_estimated(uncollapsed_estimated_),
uncollapsed_upper_bound(uncollapsed_upper_bound_),
max_possible(max_possible_),
max_attained(max_attained_) {
std::swap(items, items_);
}
~Internal() { delete stats; }
/// get a document by index in MSet, via the cache.
Xapian::Document get_doc_by_index(Xapian::doccount index) const;
/// Converts a weight to a percentage weight
int convert_to_percent_internal(double wt) const;
std::string snippet(const std::string & text, size_t length,
const Xapian::Stem & stemmer,
unsigned flags,
const std::string & hi_start,
const std::string & hi_end,
const std::string & omit) const;
/// Return a string describing this object.
string get_description() const;
/** Fetch items specified into the document cache.
*/
void fetch_items(Xapian::doccount first, Xapian::doccount last) const;
};
class RSet::Internal : public Xapian::Internal::intrusive_base {
friend class Xapian::RSet;
private:
/// Items in the relevance set.
set<Xapian::docid> items;
public:
const set<Xapian::docid> & get_items() const { return items; }
/// Return a string describing this object.
string get_description() const;
};
}
#endif // OM_HGUARD_OMENQUIREINTERNAL_H
|