File: databaseinternal.h

package info (click to toggle)
xapian-core 1.5.2-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 25,276 kB
  • sloc: cpp: 136,717; ansic: 11,798; sh: 5,028; perl: 1,024; javascript: 551; makefile: 460; tcl: 299; python: 40
file content (526 lines) | stat: -rw-r--r-- 18,230 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
/** @file
 * @brief Virtual base class for Database internals
 */
/* Copyright 2004-2024 Olly Betts
 * Copyright 2007,2008 Lemur Consulting Ltd
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, see
 * <https://www.gnu.org/licenses/>.
 */

#ifndef XAPIAN_INCLUDED_DATABASEINTERNAL_H
#define XAPIAN_INCLUDED_DATABASEINTERNAL_H

#include "internaltypes.h"

#include <xapian/database.h>
#include <xapian/document.h>
#include <xapian/intrusive_ptr.h>
#include <xapian/positioniterator.h>
#include <xapian/postingiterator.h>
#include <xapian/termiterator.h>
#include <xapian/types.h>
#include <xapian/valueiterator.h>

#include <string>
#include <string_view>

typedef Xapian::TermIterator::Internal TermList;
typedef Xapian::PositionIterator::Internal PositionList;
typedef Xapian::ValueIterator::Internal ValueList;

class LeafPostList;

namespace Xapian {
namespace Internal {
class PostList;
}
}
using Xapian::Internal::PostList;

namespace Xapian {

class Query;
struct ReplicationInfo;

/// Virtual base class for Database internals
class Database::Internal : public Xapian::Internal::intrusive_base {
    friend class Database;

    /// Don't allow assignment.
    Internal& operator=(const Internal&) = delete;

    /// Don't allow copying.
    Internal(const Internal&) = delete;

    /// The "action required" helper for the dtor_called() helper.
    void dtor_called_();

  protected:
    /// Transaction state enum.
    enum transaction_state {
	TRANSACTION_READONLY = -2, // Not a writable database shard.
	TRANSACTION_UNIMPLEMENTED = -1, // Used by InMemory.
	TRANSACTION_NONE = 0,
	TRANSACTION_UNFLUSHED = 1,
	TRANSACTION_FLUSHED = 2
    };

    /** Only constructable as a base class for derived classes.
     *
     *  @param transaction_support  One of:
     *	* TRANSACTION_READONLY - read-only shard
     *	* TRANSACTION_UNIMPLEMENTED - writable but no transaction support
     *	* TRANSACTION_NONE - writable with transaction support
     */
    Internal(transaction_state transaction_support)
	: state(transaction_support) {}

    /// Current transaction state.
    transaction_state state;

    /// Test if this shard is read-only.
    bool is_read_only() const {
	return state == TRANSACTION_READONLY;
    }

    /// Test if a transaction is currently active.
    bool transaction_active() const { return state > 0; }

    /** Helper to process uncommitted changes when a writable db is destroyed.
     *
     *  The destructor of a derived writable database class needs to call this
     *  method - we can't call it from our own destructor because we need to
     *  be able to call methods in the derived class, but that's no longer
     *  valid by the time our destructor runs, as that happens after the
     *  destructor of the derived class has run.
     *
     *  If a transaction is active, it is cancelled.  Otherwise we attempt to
     *  commit uncommitted changes, but because it is not safe to throw
     *  exceptions from destructors, this method will catch and discard any
     *  exceptions.
     */
    void dtor_called() {
	// Inline the check to exclude no-op cases (read-only and unimplemented).
	if (state >= 0)
	    dtor_called_();
    }

  public:
    /** We have virtual methods and want to be able to delete derived classes
     *  using a pointer to the base class, so we need a virtual destructor.
     */
    virtual ~Internal() {}

    typedef Xapian::doccount size_type;

    virtual size_type size() const;

    virtual void keep_alive();

    virtual void readahead_for_query(const Query& query) const;

    virtual doccount get_doccount() const = 0;

    /** Return the last used document id of this (sub) database. */
    virtual docid get_lastdocid() const = 0;

    /** Return the total length of all documents in this database. */
    virtual totallength get_total_length() const = 0;

    virtual termcount get_doclength(docid did) const = 0;

    /** Get the number of unique terms in document.
     *
     *  @param did  The document id of the document to return this value for.
     */
    virtual termcount get_unique_terms(docid did) const = 0;

    /** Get the max wdf in document.
     *
     *  @param did  The document id of the document to return this value for.
     */
    virtual termcount get_wdfdocmax(docid did) const = 0;

    /** Returns frequencies for a term.
     *
     *  @param term		The term to get frequencies for
     *  @param termfreq_ptr	Point to return number of docs indexed by @a
     *				term (or NULL not to return)
     *  @param collfreq_ptr	Point to return number of occurrences of @a
     *				term in the database (or NULL not to return)
     */
    virtual void get_freqs(std::string_view term,
			   doccount* termfreq_ptr,
			   termcount* collfreq_ptr) const = 0;

    /** Return the frequency of a given value slot.
     *
     *  This is the number of documents which have a (non-empty) value stored
     *  in the slot.
     *
     *  @param slot The value slot to examine.
     */
    virtual doccount get_value_freq(valueno slot) const = 0;

    /** Get a lower bound on the values stored in the given value slot.
     *
     *  If there are no values stored in the given value slot, this will return
     *  an empty string.
     *
     *  @param slot The value slot to examine.
     */
    virtual std::string get_value_lower_bound(valueno slot) const = 0;

    /** Get an upper bound on the values stored in the given value slot.
     *
     *  If there are no values stored in the given value slot, this will return
     *  an empty string.
     *
     *  @param slot The value slot to examine.
     */
    virtual std::string get_value_upper_bound(valueno slot) const = 0;

    /** Get a lower bound on the length of a document in this DB.
     *
     *  This bound does not include any zero-length documents.
     */
    virtual termcount get_doclength_lower_bound() const = 0;

    /// Get an upper bound on the length of a document in this DB.
    virtual termcount get_doclength_upper_bound() const = 0;

    /// Get an upper bound on the wdf of term @a term.
    virtual termcount get_wdf_upper_bound(std::string_view term) const = 0;

    /// Get a lower bound on the unique terms size of a document in this DB.
    virtual termcount get_unique_terms_lower_bound() const;

    /// Get an upper bound on the unique terms size of a document in this DB.
    virtual termcount get_unique_terms_upper_bound() const;

    virtual bool term_exists(std::string_view term) const = 0;

    /** Check whether this database contains any positional information. */
    virtual bool has_positions() const = 0;

    /** Return a PostList suitable for use in a PostingIterator. */
    virtual PostList* open_post_list(std::string_view term) const = 0;

    /** Create a LeafPostList for use during a match.
     *
     *  @param term		The term to open a postlist for, or the empty
     *				string to create an all-docs postlist.
     *
     *  @param need_read_pos	Does the postlist need to support
     *				read_position_list()?  Note that
     *				open_position_list() may still be called even
     *				if need_read_pos is false.
     */
    virtual LeafPostList* open_leaf_post_list(std::string_view term,
					      bool need_read_pos) const = 0;

    /** Open a value stream.
     *
     *  This returns the value in a particular slot for each document.
     *
     *  @param slot	The value slot.
     *
     *  @return	Pointer to a new ValueList object which should be deleted by
     *		the caller once it is no longer needed.
     */
    virtual ValueList* open_value_list(valueno slot) const;

    virtual TermList* open_term_list(docid did) const = 0;

    /** Like open_term_list() but without MultiTermList wrapper.
     *
     *  MultiDatabase::open_term_list() wraps the returns TermList in a
     *  MultiTermList, but we don't want that for query expansion.
     */
    virtual TermList* open_term_list_direct(docid did) const = 0;

    virtual TermList* open_allterms(std::string_view prefix) const = 0;

    virtual PositionList* open_position_list(docid did,
					     std::string_view term) const = 0;

    /** Open a handle on a document.
     *
     *  The returned handle provides access to document data and document
     *  values.
     *
     *  @param did	The document id to open.
     *
     *  @param lazy	If true, there's no need to check that this document
     *			actually exists (only a hint - the backend may still
     *			check).  Used to avoid unnecessary work when we already
     *			know that the requested document exists.
     *
     *  @return		A new document object, owned by the caller.
     */
    virtual Document::Internal* open_document(docid did, bool lazy) const = 0;

    /** Create a termlist tree from trigrams of @a word.
     *
     *  You can assume word.size() > 1.
     *
     *  If there are no trigrams, returns NULL.
     */
    virtual TermList* open_spelling_termlist(std::string_view word) const;

    /** Return a termlist which returns the words which are spelling
     *  correction targets.
     *
     *  If there are no spelling correction targets, returns NULL.
     */
    virtual TermList* open_spelling_wordlist() const;

    /** Return the number of times @a word was added as a spelling. */
    virtual doccount get_spelling_frequency(std::string_view word) const;

    /** Add a word to the spelling dictionary.
     *
     *  If the word is already present, its frequency is increased.
     *
     *  @param word	The word to add.
     *  @param freqinc	How much to increase its frequency by.
     */
    virtual void add_spelling(std::string_view word,
			      termcount freqinc) const;

    /** Remove a word from the spelling dictionary.
     *
     *  The word's frequency is decreased, and if would become zero or less
     *  then the word is removed completely.
     *
     *  @param word	The word to remove.
     *  @param freqdec	How much to decrease its frequency by.
     *
     *  @return Any freqdec not "used up".
     */
    virtual termcount remove_spelling(std::string_view word,
				      termcount freqdec) const;

    /** Open a termlist returning synonyms for a term.
     *
     *  If @a term has no synonyms, returns NULL.
     */
    virtual TermList* open_synonym_termlist(std::string_view term) const;

    /** Open a termlist returning each term which has synonyms.
     *
     *  @param prefix   If non-empty, only terms with this prefix are
     *		    returned.
     */
    virtual TermList* open_synonym_keylist(std::string_view prefix) const;

    /** Add a synonym for a term.
     *
     *  If @a synonym is already a synonym for @a term, then no action is
     *  taken.
     */
    virtual void add_synonym(std::string_view term,
			     std::string_view synonym) const;

    /** Remove a synonym for a term.
     *
     *  If @a synonym isn't a synonym for @a term, then no action is taken.
     */
    virtual void remove_synonym(std::string_view term,
				std::string_view synonym) const;

    /** Clear all synonyms for a term.
     *
     *  If @a term has no synonyms, no action is taken.
     */
    virtual void clear_synonyms(std::string_view term) const;

    /** Get the metadata associated with a given key.
     *
     *  See Database::get_metadata() for more information.
     */
    virtual std::string get_metadata(std::string_view key) const;

    /** Open a termlist returning each metadata key.
     *
     *  Only metadata keys which are associated with a non-empty value will
     *  be returned.
     *
     *  @param prefix   If non-empty, only keys with this prefix are returned.
     */
    virtual TermList* open_metadata_keylist(std::string_view prefix) const;

    /** Set the metadata associated with a given key.
     *
     *  See WritableDatabase::set_metadata() for more information.
     */
    virtual void set_metadata(std::string_view key, std::string_view value);

    /** Reopen the database to the latest available revision.
     *
     *  Database backends which don't support simultaneous update and
     *  reading probably don't need to do anything here.
     */
    virtual bool reopen();

    /** Close the database */
    virtual void close() = 0;

    /** Commit pending modifications to the database. */
    virtual void commit();

    /** Cancel pending modifications to the database. */
    virtual void cancel();

    /** Begin transaction. */
    virtual void begin_transaction(bool flushed);

    /** End transaction.
     *
     *  @param do_commit	If true, commits the transaction; if false,
     *				cancels the transaction.
     */
    virtual void end_transaction(bool do_commit);

    virtual docid add_document(const Document& document);

    virtual void delete_document(docid did);

    /** Delete any documents indexed by a term from the database. */
    virtual void delete_document(std::string_view unique_term);

    virtual void replace_document(docid did,
				  const Document& document);

    /** Replace any documents matching a term. */
    virtual docid replace_document(std::string_view unique_term,
				   const Document& document);

    /** Request a document.
     *
     *  This tells the database that we're going to want a particular
     *  document soon.  It's just a hint which the backend may ignore,
     *  but for glass it issues a preread hint on the file with the
     *  document data in, and for the remote backend it might cause
     *  the document to be fetched asynchronously (this isn't currently
     *  implemented though).
     *
     *  It can be called for multiple documents in turn, and a common usage
     *  pattern would be to iterate over an MSet and request the documents,
     *  then iterate over it again to actually get and display them.
     *
     *  The default implementation is a no-op.
     */
    virtual void request_document(docid did) const;

    /** Write a set of changesets to a file descriptor.
     *
     *  This call may reopen the database, leaving it pointing to a more
     *  recent version of the database.
     */
    virtual void write_changesets_to_fd(int fd,
					std::string_view start_revision,
					bool need_whole_db,
					ReplicationInfo* info);

    /// Get revision number of database (if meaningful).
    virtual Xapian::rev get_revision() const;

    /** Get a UUID for the database.
     *
     *  The UUID will persist for the lifetime of the database.
     *
     *  Replicas (eg, made with the replication protocol, or by copying all
     *  the database files) will have the same UUID.  However, copies (made
     *  with copydatabase, or xapian-compact) will have different UUIDs.
     *
     *  If the backend does not support UUIDs the empty string is returned.
     */
    virtual std::string get_uuid() const;

    /** Notify the database that document is no longer valid.
     *
     *  This is used to invalidate references to a document kept by a
     *  database for doing lazy updates.  If we moved to using a weak_ptr
     *  instead we wouldn't need a special method for this, but it would
     *  involve a fair bit of reorganising of other parts of the code.
     */
    virtual void invalidate_doc_object(Document::Internal* obj) const;

    /** Get backend information about this database.
     *
     *  @param path	If non-NULL, and set the pointed to string to the file
     *			path of this database (or to some string describing
     *			the database in a backend-specified format if "path"
     *			isn't a concept which makes sense).
     *
     *  @return	A constant indicating the backend type.
     */
    virtual int get_backend_info(std::string* path) const = 0;

    /** Find lowest and highest docids actually in use.
     *
     *  Used during local matching and compaction, so only needs to be
     *  implemented by backends which support one or both of these.
     *
     *  For example, EmptyDatabase, MultiDatabase and RemoteDatabase don't need
     *  to implement this (empty shards are skipped early by the matcher;
     *  sharded databases are handled explicitly by the matcher rather than via
     *  the "multi" backend; matching for remote shards runs as a local match
     *  on the remote).
     */
    virtual void get_used_docid_range(docid& first,
				      docid& last) const;

    /** Return true if the database is open for writing.
     *
     *  If this is a WritableDatabase, always returns true.
     *
     *  For a Database, test if there's a writer holding the lock (or if
     *  we can't test for a lock without taking it on the current platform,
     *  throw Xapian::UnimplementedError).
     */
    virtual bool locked() const;

    /** Lock a read-only database for writing or unlock a writable database.
     *
     *  This is the internal method behind Database::lock() and
     *  Database::unlock().
     *
     *  In the unlocking case, the writable database is closed.  In the
     *  locking case, the read-only database is left open.
     *
     *  @param flags  Xapian::DB_READONLY_ to unlock, otherwise the flags
     *		      to use when opening from writing.
     *
     *  @return  The new Database::Internal object (or the current object
     *		 if no action is required - e.g. unlock on a read-only
     *		 database).
     */
    virtual Internal* update_lock(int flags);

    virtual std::string reconstruct_text(Xapian::docid did,
					 size_t length,
					 std::string_view prefix,
					 Xapian::termpos start_pos,
					 Xapian::termpos end_pos) const;

    /// Return a string describing this object.
    virtual std::string get_description() const = 0;
};

}

#endif // XAPIAN_INCLUDED_DATABASEINTERNAL_H