File: omenquireinternal.h

package info (click to toggle)
xapian-core 1.4.3-2%2Bdeb9u3
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 21,412 kB
  • sloc: cpp: 113,868; ansic: 8,723; sh: 4,433; perl: 836; makefile: 566; tcl: 317; python: 40
file content (334 lines) | stat: -rw-r--r-- 9,331 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
/** @file omenquireinternal.h
 * @brief Internals
 */
/* Copyright 1999,2000,2001 BrightStation PLC
 * Copyright 2001,2002 Ananova Ltd
 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015,2016 Olly Betts
 * Copyright 2009 Lemur Consulting Ltd
 * Copyright 2011 Action Without Borders
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
 */

#ifndef OM_HGUARD_OMENQUIREINTERNAL_H
#define OM_HGUARD_OMENQUIREINTERNAL_H

#include "xapian/database.h"
#include "xapian/document.h"
#include "xapian/enquire.h"
#include "xapian/query.h"
#include "xapian/keymaker.h"

#include <algorithm>
#include <cmath>
#include <map>
#include <set>
#include <unordered_map>

#include "weight/weightinternal.h"

using namespace std;

class OmExpand;
class MultiMatch;

namespace Xapian {

class TermIterator;

namespace Internal {

/** An item resulting from a query.
 *  This item contains the document id, and the weight calculated for
 *  the document.
 */
class MSetItem {
    public:
	MSetItem(double wt_, Xapian::docid did_)
		: wt(wt_), did(did_), collapse_count(0) {}

	MSetItem(double wt_, Xapian::docid did_, const string &key_)
		: wt(wt_), did(did_), collapse_key(key_), collapse_count(0) {}

	MSetItem(double wt_, Xapian::docid did_, const string &key_,
		 Xapian::doccount collapse_count_)
		: wt(wt_), did(did_), collapse_key(key_),
		  collapse_count(collapse_count_) {}

	void swap(MSetItem & o) {
	    std::swap(wt, o.wt);
	    std::swap(did, o.did);
	    std::swap(collapse_key, o.collapse_key);
	    std::swap(collapse_count, o.collapse_count);
	    std::swap(sort_key, o.sort_key);
	}

	/** Weight calculated. */
	double wt;

	/** Document id. */
	Xapian::docid did;

	/** Value which was used to collapse upon.
	 *
	 *  If the collapse option is not being used, this will always
	 *  have a null value.
	 *
	 *  If the collapse option is in use, this will contain the collapse
	 *  key's value for this particular item.  If the key is not present
	 *  for this item, the value will be a null string.  Only one instance
	 *  of each key value (apart from the null string) will be present in
	 *  the items in the returned Xapian::MSet.
	 */
	string collapse_key;

	/** Count of collapses done on collapse_key so far
	 *
	 * This is normally 0, and goes up for each collapse done
	 * It is not necessarily an indication of how many collapses
	 * might be done if an exhaustive match was done
	 */
	Xapian::doccount collapse_count;

	/** Used when sorting by value. */
	string sort_key;

	/// Return a string describing this object.
	string get_description() const;
};

}

/** Internals of enquire system.
 *  This allows the implementation of Xapian::Enquire to be hidden and reference
 *  counted.
 */
class Enquire::Internal : public Xapian::Internal::intrusive_base {
    friend class MSet::Internal;
    private:
	/// The database which this enquire object uses.
	const Xapian::Database db;

	/// The user's query.
	Query query;

	/// The query length.
	termcount qlen;

	/// Copy not allowed
	Internal(const Internal &);
	/// Assignment not allowed
	void operator=(const Internal &);

    public:
	typedef enum { REL, VAL, VAL_REL, REL_VAL } sort_setting;

	Xapian::valueno collapse_key;

	Xapian::doccount collapse_max;

	Xapian::Enquire::docid_order order;

	int percent_cutoff;

	double weight_cutoff;

	Xapian::valueno sort_key;
	sort_setting sort_by;
	bool sort_value_forward;

	Xapian::Internal::opt_intrusive_ptr<KeyMaker> sorter;

	double time_limit;

	/** The weight to use for this query.
	 *
	 *  This is mutable so that the default BM25Weight object can be
	 *  created lazily when first required.
	 */
	mutable Weight * weight;

	/// The weighting scheme to use for query expansion.
	std::string eweightname;

	/// The parameter required for TradWeight query expansion.
	double expand_k;

	vector<Xapian::Internal::opt_intrusive_ptr<MatchSpy>> spies;

	explicit Internal(const Xapian::Database &databases);
	~Internal();

	/** Request a document from the database.
	 */
	void request_doc(const Xapian::Internal::MSetItem &item) const;

	/** Read a previously requested document from the database.
	 */
	Xapian::Document read_doc(const Xapian::Internal::MSetItem &item) const;

	Xapian::Document get_document(const Xapian::Internal::MSetItem &item) const;

	void set_query(const Query & query_, termcount qlen_);
	const Query & get_query() const;
	MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
		      Xapian::doccount check_at_least,
		      const RSet *omrset,
		      const MatchDecider *mdecider) const;

	ESet get_eset(Xapian::termcount maxitems, const RSet & omrset, int flags,
		      const ExpandDecider *edecider, double min_wt) const;

	TermIterator get_matching_terms(Xapian::docid did) const;
	TermIterator get_matching_terms(const Xapian::MSetIterator &it) const;

	Xapian::doccount get_termfreq(const string &tname) const;

	string get_description() const;
};

class MSet::Internal : public Xapian::Internal::intrusive_base {
    public:
	/// Factor to multiply weights by to convert them to percentages.
	double percent_factor;

    private:
	/** The set of documents which have been requested but not yet
	 *  collected.
	 */
	mutable set<Xapian::doccount> requested_docs;

	/// Cache of documents, indexed by MSet index.
	mutable map<Xapian::doccount, Xapian::Document> indexeddocs;

	/// Read and cache the documents so far requested.
	void read_docs() const;

	/// Copy not allowed
	Internal(const Internal &);
	/// Assignment not allowed
	void operator=(const Internal &);

	mutable std::unordered_map<std::string, double> snippet_bg_relevance;

    public:
	/// Xapian::Enquire reference, for getting documents.
	Xapian::Internal::intrusive_ptr<const Enquire::Internal> enquire;

	/** Provides the term frequency and weight for each term in the query. */
	Xapian::Weight::Internal * stats;

	/// A list of items comprising the (selected part of the) MSet.
	vector<Xapian::Internal::MSetItem> items;

	/// Rank of first item in MSet.
	Xapian::doccount firstitem;

	Xapian::doccount matches_lower_bound;

	Xapian::doccount matches_estimated;

	Xapian::doccount matches_upper_bound;

	Xapian::doccount uncollapsed_lower_bound;

	Xapian::doccount uncollapsed_estimated;

	Xapian::doccount uncollapsed_upper_bound;

	double max_possible;

	double max_attained;

	Internal()
		: percent_factor(0),
		  stats(NULL),
		  firstitem(0),
		  matches_lower_bound(0),
		  matches_estimated(0),
		  matches_upper_bound(0),
		  uncollapsed_lower_bound(0),
		  uncollapsed_estimated(0),
		  uncollapsed_upper_bound(0),
		  max_possible(0),
		  max_attained(0) {}

	/// Note: destroys parameter items.
	Internal(Xapian::doccount firstitem_,
	     Xapian::doccount matches_upper_bound_,
	     Xapian::doccount matches_lower_bound_,
	     Xapian::doccount matches_estimated_,
	     Xapian::doccount uncollapsed_upper_bound_,
	     Xapian::doccount uncollapsed_lower_bound_,
	     Xapian::doccount uncollapsed_estimated_,
	     double max_possible_,
	     double max_attained_,
	     vector<Xapian::Internal::MSetItem> &items_,
	     double percent_factor_)
		: percent_factor(percent_factor_),
		  stats(NULL),
		  firstitem(firstitem_),
		  matches_lower_bound(matches_lower_bound_),
		  matches_estimated(matches_estimated_),
		  matches_upper_bound(matches_upper_bound_),
		  uncollapsed_lower_bound(uncollapsed_lower_bound_),
		  uncollapsed_estimated(uncollapsed_estimated_),
		  uncollapsed_upper_bound(uncollapsed_upper_bound_),
		  max_possible(max_possible_),
		  max_attained(max_attained_) {
	    std::swap(items, items_);
	}

	~Internal() { delete stats; }

	/// get a document by index in MSet, via the cache.
	Xapian::Document get_doc_by_index(Xapian::doccount index) const;

	/// Converts a weight to a percentage weight
	int convert_to_percent_internal(double wt) const;

	std::string snippet(const std::string & text, size_t length,
			    const Xapian::Stem & stemmer,
			    unsigned flags,
			    const std::string & hi_start,
			    const std::string & hi_end,
			    const std::string & omit) const;

	/// Return a string describing this object.
	string get_description() const;

	/** Fetch items specified into the document cache.
	 */
	void fetch_items(Xapian::doccount first, Xapian::doccount last) const;
};

class RSet::Internal : public Xapian::Internal::intrusive_base {
    friend class Xapian::RSet;

    private:
	/// Items in the relevance set.
	set<Xapian::docid> items;

    public:
	const set<Xapian::docid> & get_items() const { return items; }

	/// Return a string describing this object.
	string get_description() const;
};

}

#endif // OM_HGUARD_OMENQUIREINTERNAL_H