File: terminfo.h

package info (click to toggle)
xapian-core 2.0.0-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 25,008 kB
  • sloc: cpp: 136,717; ansic: 11,798; sh: 5,416; perl: 1,024; javascript: 551; makefile: 460; tcl: 299; python: 40
file content (177 lines) | stat: -rw-r--r-- 5,258 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
/** @file
 * @brief Metadata for a term in a document
 */
/* Copyright 2017,2018,2019 Olly Betts
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, see
 * <https://www.gnu.org/licenses/>.
 */

#ifndef XAPIAN_INCLUDED_TERMINFO_H
#define XAPIAN_INCLUDED_TERMINFO_H

#include "api/smallvector.h"
#include <xapian/types.h>

/// Metadata for a term in a document
class TermInfo {
    Xapian::termcount wdf;

    /** Split point in the position range.
     *
     *  To allow more efficient insertion of positions, we support the
     *  positions being split into two sorted ranges, and if this is the
     *  case, split will be > 0 and there will be two sorted ranges [0, split)
     *  and [split, positions.size()).
     *
     *  If split is 0, then [0, positions.size()) form a single sorted range.
     *
     *  If positions.empty(), then split > 0 indicates that the term has been
     *  deleted (this allows us to delete terms without invalidating existing
     *  TermIterator objects).
     *
     *  Use type unsigned here to avoid bloating this structure.  More than
     *  4 billion positions in one document is not sensible (and not possible
     *  unless termpos is configured to be 64 bit).
     */
    mutable unsigned split = 0;

    /** Positions at which the term occurs.
     *
     *  The entries are sorted in strictly increasing order (so duplicate
     *  entries are not allowed).
     */
    mutable Xapian::VecCOW<Xapian::termpos> positions;

    /** Merge sorted ranges before and after @a split. */
    void merge() const;

  public:
    /** Constructor.
     *
     *  @param wdf_   Within-document frequency
     */
    explicit TermInfo(Xapian::termcount wdf_) : wdf(wdf_) {}

    /** Constructor which also adds an initial position.
     *
     *  @param wdf_   Within-document frequency
     *  @param termpos	Position to add
     */
    TermInfo(Xapian::termcount wdf_, Xapian::termpos termpos) : wdf(wdf_) {
	positions.push_back(termpos);
    }

    /// Get a pointer to the positions.
    const Xapian::VecCOW<Xapian::termpos>* get_positions() const {
	if (split) merge();
	return &positions;
    }

    bool has_positions() const { return !positions.empty(); }

    size_t count_positions() const { return positions.size(); }

    /// Get the within-document frequency.
    Xapian::termcount get_wdf() const { return wdf; }

    /** Increase within-document frequency.
     *
     *  @return true if the term was flagged as deleted before the operation.
     */
    bool increase_wdf(Xapian::termcount delta) {
	if (rare(is_deleted())) {
	    split = 0;
	    wdf = delta;
	    return true;
	}
	wdf += delta;
	return false;
    }

    /** Decrease within-document frequency.
     *
     *  @return true If the adjusted wdf is zero and there are no positions.
     */
    bool decrease_wdf(Xapian::termcount delta) {
	// Saturating arithmetic - don't let the wdf go below zero.
	if (wdf >= delta) {
	    wdf -= delta;
	} else {
	    wdf = 0;
	}
	if (wdf == 0 && positions.empty()) {
	    // Flag term as deleted if no wdf or positions.
	    split = 1;
	    return true;
	}
	return false;
    }

    bool remove() {
	if (is_deleted())
	    return false;
	positions.clear();
	split = 1;
	return true;
    }

    /** Add a position.
     *
     *  If @a termpos is already present, this is a no-op.
     *
     *  @param wdf_inc  wdf increment
     *  @param termpos	Position to add
     *
     *  @return true if the term was flagged as deleted before the operation.
     */
    bool add_position(Xapian::termcount wdf_inc, Xapian::termpos termpos);

    /** Append a position.
     *
     *  The position must be >= the largest currently in the list.
     */
    void append_position(Xapian::termpos termpos) {
	positions.push_back(termpos);
    }

    /** Remove a position.
     *
     *  @param termpos	Position to remove
     *
     *  @return If @a termpos wasn't present, returns false.
     */
    bool remove_position(Xapian::termpos termpos);

    /** Remove a range of positions.
     *
     *  @param termpos_first	First position to remove
     *  @param termpos_last	Last position to remove
     *
     *  It's OK if there are no positions in the specified range.
     *
     *  @return the number of positions removed.
     */
    Xapian::termpos remove_positions(Xapian::termpos termpos_first,
				     Xapian::termpos termpos_last);

    /** Has this term been deleted from this document?
     *
     *  We flag entries as deleted instead of actually deleting them to avoid
     *  invalidating existing TermIterator objects.
     */
    bool is_deleted() const { return positions.empty() && split > 0; }
};

#endif // XAPIAN_INCLUDED_TERMINFO_H