File: postlist.h

package info (click to toggle)
xapian-core 1.4.3-2%2Bdeb9u3
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 21,412 kB
  • sloc: cpp: 113,868; ansic: 8,723; sh: 4,433; perl: 836; makefile: 566; tcl: 317; python: 40
file content (215 lines) | stat: -rw-r--r-- 8,324 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
/** @file postlist.h
 * @brief Abstract base class for postlists.
 */
/* Copyright (C) 2007,2008,2009,2011,2015,2017 Olly Betts
 * Copyright (C) 2009 Lemur Consulting Ltd
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
 */

#ifndef XAPIAN_INCLUDED_POSTLIST_H
#define XAPIAN_INCLUDED_POSTLIST_H

#include <string>

#include "xapian/intrusive_ptr.h"
#include <xapian/types.h>
#include <xapian/postingiterator.h>

#include "backends/positionlist.h"
#include "weight/weightinternal.h"

class OrPositionList;

/// Abstract base class for postlists.
class Xapian::PostingIterator::Internal : public Xapian::Internal::intrusive_base {
    /// Don't allow assignment.
    void operator=(const Internal &);

    /// Don't allow copying.
    Internal(const Internal &);

  protected:
    /// Only constructable as a base class for derived classes.
    Internal() { }

  public:
    /** We have virtual methods and want to be able to delete derived classes
     *  using a pointer to the base class, so we need a virtual destructor.
     */
    virtual ~Internal();

    /// Get a lower bound on the number of documents indexed by this term.
    virtual Xapian::doccount get_termfreq_min() const = 0;

    /// Get an upper bound on the number of documents indexed by this term.
    virtual Xapian::doccount get_termfreq_max() const = 0;

    /** Get an estimate of the number of documents indexed by this term.
     *
     *  It should always be true that:
     *  get_termfreq_min() <= get_termfreq_est() <= get_termfreq_max()
     */
    virtual Xapian::doccount get_termfreq_est() const = 0;

    /** Get an estimate for the termfreq and reltermfreq, given the stats.
     *
     *  The frequencies may be for a combination of databases, or for just the
     *  relevant documents, so the results need not lie in the bounds given by
     *  get_termfreq_min() and get_termfreq_max().
     */
    virtual TermFreqs get_termfreq_est_using_stats(
	const Xapian::Weight::Internal & stats) const;

    /// Return an upper bound on what get_weight() can return.
    virtual double get_maxweight() const = 0;

    /// Return the current docid.
    virtual Xapian::docid get_docid() const = 0;

    /// Return the length of current document.
    virtual Xapian::termcount get_doclength() const = 0;
    /* FIXME: Once flint has been retired, we should probably strip out
     * PostList::get_doclength() and just fetch it from the DB directly.
     */

    /// Return the number of unique terms in the current document.
    virtual Xapian::termcount get_unique_terms() const = 0;

    /** Return the wdf for the document at the current position.
     *
     *  The default implementation throws Xapian::UnimplementedError.
     */
    virtual Xapian::termcount get_wdf() const;

    /// Return the weight contribution for the current position.
    virtual double get_weight() const = 0;

    virtual const std::string * get_sort_key() const;

    /** If the collapse key is already known, return it.
     *
     *  This is implemented by MSetPostList (and MergePostList).  Other
     *  subclasses rely on the default implementation which just returns
     *  NULL.
     */
    virtual const std::string * get_collapse_key() const;

    /// Return true if the current position is past the last entry in this list.
    virtual bool at_end() const = 0;

    /** Recalculate the upper bound on what get_weight() can return.
     *
     *  If the tree has pruned, get_maxweight() may use cached values.  Calling
     *  this method instead forces a full recalculation.
     *
     *  Note that this method may be called after the postlist has reached the
     *  end.  In this situation, the method should return 0.
     */
    virtual double recalc_maxweight() = 0;

    /** Read the position list for the term in the current document and
     *  return a pointer to it (owned by the PostList).
     *
     *  The default implementation throws Xapian::UnimplementedError.
     */
    virtual PositionList * read_position_list();

    /** Read the position list for the term in the current document and
     *  return a pointer to it (not owned by the PostList).
     *
     *  The default implementation throws Xapian::UnimplementedError.
     */
    virtual PositionList * open_position_list() const;

    /** Advance the current position to the next document in the postlist.
     *
     *  The list starts before the first entry in the list, so next()
     *  must be called before any methods which need the context of
     *  the current position.
     *
     *  @param w_min	The minimum weight contribution that is needed (this is
     *			just a hint which PostList subclasses may ignore).
     *
     *  @return	If a non-NULL pointer is returned, then the caller should
     *		substitute the returned pointer for its pointer to us, and then
     *		delete us.  This "pruning" can only happen for a non-leaf
     *		subclass of this class.
     */
    virtual Internal * next(double w_min) = 0;

    /** Skip forward to the specified docid.
     *
     *  If the specified docid isn't in the list, position ourselves on the
     *  first document after it (or at_end() if no greater docids are present).
     *
     *  @param w_min	The minimum weight contribution that is needed (this is
     *			just a hint which PostList subclasses may ignore).
     *
     *  @return	If a non-NULL pointer is returned, then the caller should
     *		substitute the returned pointer for its pointer to us, and then
     *		delete us.  This "pruning" can only happen for a non-leaf
     *		subclass of this class.
     */
    virtual Internal * skip_to(Xapian::docid, double w_min) = 0;

    /** Check if the specified docid occurs in this postlist.
     *
     *  The caller is required to ensure that the specified @a docid actually
     *  exists in the database.
     *
     *  This method acts like skip_to() if that can be done at little extra
     *  cost, in which case it then sets @a valid to true.
     *
     *  Otherwise it simply checks if a particular docid is present.  If it
     *  is, @a valid is set to true.  If it isn't, it sets @a valid to
     *  false, and leaves the position unspecified (and hence the result of
     *  calling methods which depends on the current position, such as
     *  get_docid(), are also unspecified).  In this state, next() will
     *  advance to the first matching position after @a docid, and skip_to()
     *  will act as it would if the position was the first matching position
     *  after @a docid.
     *
     *  The default implementation calls skip_to().
     */
    virtual Internal * check(Xapian::docid did, double w_min, bool &valid);

    /** Advance the current position to the next document in the postlist.
     *
     *  Any weight contribution is acceptable.
     */
    Internal * next() { return next(0.0); }

    /** Skip forward to the specified docid.
     *
     *  Any weight contribution is acceptable.
     */
    Internal * skip_to(Xapian::docid did) { return skip_to(did, 0.0); }

    /// Count the number of leaf subqueries which match at the current position.
    virtual Xapian::termcount count_matching_subqs() const;

    /// Gather PositionList* objects for a subtree.
    virtual void gather_position_lists(OrPositionList* orposlist);

    /// Return a string description of this object.
    virtual std::string get_description() const = 0;
};

// In the external API headers, this class is Xapian::PostingIterator::Internal,
// but in the library code it's still known as "PostList" in most places.
typedef Xapian::PostingIterator::Internal PostList;

#endif // XAPIAN_INCLUDED_POSTLIST_H