1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
|
/** @file glass_postlist.h
* @brief Postlists in glass databases
*/
/* Copyright 1999,2000,2001 BrightStation PLC
* Copyright 2002 Ananova Ltd
* Copyright 2002,2003,2004,2005,2007,2008,2009,2011,2013,2014,2015 Olly Betts
* Copyright 2007,2009 Lemur Consulting Ltd
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
#ifndef OM_HGUARD_GLASS_POSTLIST_H
#define OM_HGUARD_GLASS_POSTLIST_H
#include <xapian/database.h>
#include "glass_defs.h"
#include "glass_inverter.h"
#include "glass_positionlist.h"
#include "api/leafpostlist.h"
#include "omassert.h"
#include "autoptr.h"
#include <map>
#include <string>
using namespace std;
class GlassCursor;
class GlassDatabase;
namespace Glass {
class PostlistChunkReader;
class PostlistChunkWriter;
class RootInfo;
}
using Glass::RootInfo;
class GlassPostList;
class GlassPostListTable : public GlassTable {
/// PostList for looking up document lengths.
mutable AutoPtr<GlassPostList> doclen_pl;
public:
/** Create a new table object.
*
* This does not create the table on disk - the create() method must
* be called before the table is created on disk
*
* This also does not open the table - the open() method must be
* called before use is made of the table.
*
* @param path_ - Path at which the table is stored.
* @param readonly_ - whether to open the table for read only
* access.
*/
GlassPostListTable(const string & path_, bool readonly_)
: GlassTable("postlist", path_ + "/postlist.", readonly_),
doclen_pl()
{ }
GlassPostListTable(int fd, off_t offset_, bool readonly_)
: GlassTable("postlist", fd, offset_, readonly_),
doclen_pl()
{ }
void open(int flags_, const RootInfo & root_info,
glass_revision_number_t rev) {
doclen_pl.reset(0);
GlassTable::open(flags_, root_info, rev);
}
/// Merge changes for a term.
void merge_changes(const string &term, const Inverter::PostingChanges & changes);
/// Merge document length changes.
void merge_doclen_changes(const map<Xapian::docid, Xapian::termcount> & doclens);
Xapian::docid get_chunk(const string &tname,
Xapian::docid did, bool adding,
Glass::PostlistChunkReader ** from,
Glass::PostlistChunkWriter **to);
/// Compose a key from a termname and docid.
static string make_key(const string & term, Xapian::docid did) {
return pack_glass_postlist_key(term, did);
}
/// Compose a key from a termname.
static string make_key(const string & term) {
return pack_glass_postlist_key(term);
}
bool term_exists(const string & term) const {
return key_exists(make_key(term));
}
/** Returns frequencies for a term.
*
* @param term The term to get frequencies for
* @param termfreq_ptr Point to return number of docs indexed by @a
* term (or NULL not to return)
* @param collfreq_ptr Point to return number of occurrences of @a
* term in the database (or NULL not to return)
*/
void get_freqs(const std::string & term,
Xapian::doccount * termfreq_ptr,
Xapian::termcount * collfreq_ptr) const;
/** Returns the length of document @a did. */
Xapian::termcount get_doclength(Xapian::docid did,
Xapian::Internal::intrusive_ptr<const GlassDatabase> db) const;
/** Check if document @a did exists. */
bool document_exists(Xapian::docid did,
Xapian::Internal::intrusive_ptr<const GlassDatabase> db) const;
void get_used_docid_range(Xapian::docid & first,
Xapian::docid & last) const;
};
/** A postlist in a glass database.
*/
class GlassPostList : public LeafPostList {
/** The database we are searching. This pointer is held so that the
* database doesn't get deleted before us, and also to give us access
* to the position_table.
*/
Xapian::Internal::intrusive_ptr<const GlassDatabase> this_db;
/// The position list object for this posting list.
GlassPositionList positionlist;
/// Whether we've started reading the list yet.
bool have_started;
/// True if this is the last chunk.
bool is_last_chunk;
/// Whether we've run off the end of the list yet.
bool is_at_end;
/// Cursor pointing to current chunk of postlist.
AutoPtr<GlassCursor> cursor;
/// The first document id in this chunk.
Xapian::docid first_did_in_chunk;
/// The last document id in this chunk.
Xapian::docid last_did_in_chunk;
/// Position of iteration through current chunk.
const char * pos;
/// Pointer to byte after end of current chunk.
const char * end;
/// Document id we're currently at.
Xapian::docid did;
/// The wdf of the current document.
Xapian::termcount wdf;
/// The number of entries in the posting list.
Xapian::doccount number_of_entries;
/// Copying is not allowed.
GlassPostList(const GlassPostList &);
/// Assignment is not allowed.
void operator=(const GlassPostList &);
/** Move to the next item in the chunk, if possible.
* If already at the end of the chunk, returns false.
*/
bool next_in_chunk();
/** Move to the next chunk.
*
* If there are no more chunks in this postlist, this will set
* is_at_end to true.
*/
void next_chunk();
/** Return true if the given document ID lies in the range covered
* by the current chunk. This does not say whether the document ID
* is actually present. It will return false if the document ID
* is greater than the last document ID in the chunk, even if it is
* less than the first document ID in the next chunk: it is possible
* for no chunk to contain a particular document ID.
*/
bool current_chunk_contains(Xapian::docid desired_did);
/** Move to chunk containing the specified document ID.
*
* This moves to the chunk whose starting document ID is
* <= desired_did, but such that the next chunk's starting
* document ID is > desired_did.
*
* It is thus possible that current_chunk_contains(desired_did)
* will return false after this call, since the document ID
* might lie after the end of this chunk, but before the start
* of the next chunk.
*/
void move_to_chunk_containing(Xapian::docid desired_did);
/** Scan forward in the current chunk for the specified document ID.
*
* This is particularly efficient if the desired document ID is
* greater than the last in the chunk - it then skips straight
* to the end.
*
* @return true if we moved to a valid document,
* false if we reached the end of the chunk.
*/
bool move_forward_in_chunk_to_at_least(Xapian::docid desired_did);
GlassPostList(Xapian::Internal::intrusive_ptr<const GlassDatabase> this_db_,
const string & term,
GlassCursor * cursor_);
void init();
public:
/// Default constructor.
GlassPostList(Xapian::Internal::intrusive_ptr<const GlassDatabase> this_db_,
const string & term,
bool keep_reference);
/// Destructor.
~GlassPostList();
LeafPostList * open_nearby_postlist(const std::string & term_) const;
/** Used for looking up doclens.
*
* @return true if docid @a desired_did has a document length.
*/
bool jump_to(Xapian::docid desired_did);
/** Returns number of docs indexed by this term.
*
* This is the length of the postlist.
*/
Xapian::doccount get_termfreq() const { return number_of_entries; }
/// Returns the current docid.
Xapian::docid get_docid() const { Assert(have_started); return did; }
/// Returns the length of current document.
Xapian::termcount get_doclength() const;
/// Returns the number of unique terms in the current document.
Xapian::termcount get_unique_terms() const;
/** Returns the Within Document Frequency of the term in the current
* document.
*/
Xapian::termcount get_wdf() const { Assert(have_started); return wdf; }
/** Get the list of positions of the term in the current document.
*/
PositionList *read_position_list();
/** Get the list of positions of the term in the current document.
*/
PositionList * open_position_list() const;
/// Move to the next document.
PostList * next(double w_min);
/// Skip to next document with docid >= docid.
PostList * skip_to(Xapian::docid desired_did, double w_min);
/// Return true if and only if we're off the end of the list.
bool at_end() const { return is_at_end; }
/// Get a description of the document.
std::string get_description() const;
/// Read the number of entries and the collection frequency.
static void read_number_of_entries(const char ** posptr,
const char * end,
Xapian::doccount * number_of_entries_ptr,
Xapian::termcount * collection_freq_ptr);
};
#endif /* OM_HGUARD_GLASS_POSTLIST_H */
|