File: glass_spelling.h

package info (click to toggle)
xapian-core 1.4.3-2%2Bdeb9u3
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 21,412 kB
  • sloc: cpp: 113,868; ansic: 8,723; sh: 4,433; perl: 836; makefile: 566; tcl: 317; python: 40
file content (186 lines) | stat: -rw-r--r-- 5,289 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
/** @file glass_spelling.h
 * @brief Spelling correction data for a glass database.
 */
/* Copyright (C) 2007,2008,2009,2010,2011,2014,2015,2016 Olly Betts
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
 */

#ifndef XAPIAN_INCLUDED_GLASS_SPELLING_H
#define XAPIAN_INCLUDED_GLASS_SPELLING_H

#include <xapian/types.h>

#include "glass_lazytable.h"
#include "api/termlist.h"

#include <map>
#include <set>
#include <string>
#include <cstring> // For memcpy() and memcmp().

namespace Glass {

class RootInfo;

struct fragment {
    char data[4];

    // Default constructor.
    fragment() { }

    // Allow implicit conversion.
    explicit fragment(char data_[4]) { std::memcpy(data, data_, 4); }

    char & operator[] (unsigned i) { return data[i]; }
    const char & operator[] (unsigned i) const { return data[i]; }

    operator std::string () const {
	return std::string(data, data[0] == 'M' ? 4 : 3);
    }

    bool operator<(const fragment &b) const {
	return std::memcmp(data, b.data, 4) < 0;
    }
};

}

using Glass::RootInfo;

class GlassSpellingTable : public GlassLazyTable {
    void toggle_word(const std::string & word);
    void toggle_fragment(Glass::fragment frag, const std::string & word);

    std::map<std::string, Xapian::termcount> wordfreq_changes;

    /** Changes to make to the termlists.
     *
     *  This list is essentially xor-ed with the list on disk, so an entry
     *  here either means a new entry needs to be added on disk, or an
     *  existing entry on disk needs to be removed.  We do it this way so
     *  we don't need to store an additional add/remove flag for every
     *  word.
     */
    std::map<Glass::fragment, std::set<std::string> > termlist_deltas;

    /** Used to track an upper bound on wordfreq. */
    Xapian::termcount wordfreq_upper_bound = 0;

  public:
    /** Create a new GlassSpellingTable object.
     *
     *  This method does not create or open the table on disk - you
     *  must call the create() or open() methods respectively!
     *
     *  @param dbdir		The directory the glass database is stored in.
     *  @param readonly		true if we're opening read-only, else false.
     */
    GlassSpellingTable(const std::string & dbdir, bool readonly)
	: GlassLazyTable("spelling", dbdir + "/spelling.", readonly) { }

    GlassSpellingTable(int fd, off_t offset_, bool readonly)
	: GlassLazyTable("spelling", fd, offset_, readonly) { }

    /** Merge in batched-up changes.
     *
     *  @return Updated upperbound on the word frequency.
     */
    void merge_changes();

    void add_word(const std::string & word, Xapian::termcount freqinc);
    void remove_word(const std::string & word, Xapian::termcount freqdec);

    TermList * open_termlist(const std::string & word);

    Xapian::doccount get_word_frequency(const std::string & word) const;

    void set_wordfreq_upper_bound(Xapian::termcount ub) {
	wordfreq_upper_bound = ub;
    }

    /** Override methods of GlassTable.
     *
     *  NB: these aren't virtual, but we always call them on the subclass in
     *  cases where it matters.
     *  @{
     */

    bool is_modified() const {
	return !wordfreq_changes.empty() || GlassTable::is_modified();
    }

    /** Returns updated wordfreq upper bound. */
    Xapian::termcount flush_db() {
	merge_changes();
	GlassTable::flush_db();
	return wordfreq_upper_bound;
    }

    void cancel(const RootInfo & root_info, glass_revision_number_t rev) {
	// Discard batched-up changes.
	wordfreq_changes.clear();
	termlist_deltas.clear();

	GlassTable::cancel(root_info, rev);
    }

    // @}
};

/** The list of words containing a particular trigram. */
class GlassSpellingTermList : public TermList {
    /// The encoded data.
    std::string data;

    /// Position in the data.
    unsigned p;

    /// The current term.
    std::string current_term;

    /// Copying is not allowed.
    GlassSpellingTermList(const GlassSpellingTermList &);

    /// Assignment is not allowed.
    void operator=(const GlassSpellingTermList &);

  public:
    /// Constructor.
    explicit GlassSpellingTermList(const std::string & data_)
	: data(data_), p(0) { }

    Xapian::termcount get_approx_size() const;

    std::string get_termname() const;

    Xapian::termcount get_wdf() const;

    Xapian::doccount get_termfreq() const;

    Xapian::termcount get_collection_freq() const;

    TermList * next();

    TermList * skip_to(const std::string & term);

    bool at_end() const;

    Xapian::termcount positionlist_count() const;

    Xapian::PositionIterator positionlist_begin() const;
};

#endif // XAPIAN_INCLUDED_GLASS_SPELLING_H