1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
|
/*
** SWISH++
** file_list.c
**
** Copyright (C) 1998 Paul J. Lucas
**
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
// local
#include "enc_int.h"
#include "file_list.h"
#include "word_markers.h"
file_list::byte const file_list::const_iterator::end_value = 0;
//*****************************************************************************
//
// SYNOPSIS
//
file_list::size_type file_list::calc_size() const
//
// DESCRIPTION
//
// Calculate the size of the file list (the number of files the word is
// in) and cache the result.
//
// RETURN VALUE
//
// Returns the size.
//
//*****************************************************************************
{
size_ = 0;
//
// It would be nice if there were a way to calculate the size of the file
// list other than by just marching though it. Since this should be as
// fast as possible, a much simplified version of the dec_int() code has
// been inlined here by hand -- a few times. (We also don't care what the
// actual numbers are, so there's no point in computing them, so we save
// having to do two shifts, and logical or for each file.)
//
register byte const *p = ptr_;
while ( true ) {
++size_;
while ( *p++ & 0x80 ) ; // skip file index
while ( *p++ & 0x80 ) ; // skip occurrences
while ( *p++ & 0x80 ) ; // skip rank
bool more_lists = true;
while ( more_lists ) {
//
// At this point, p must be pointing to a marker.
//
switch ( *p++ ) { // skip marker
case Stop_Marker:
return size_;
case Word_Entry_Continues_Marker:
more_lists = false;
break;
default: // must be a list marker
while ( *p != Stop_Marker )
while ( *p++ & 0x80 ) ;
++p;
}
}
}
}
//*****************************************************************************
//
// SYNOPSIS
//
file_list::const_iterator& file_list::const_iterator::operator++()
//
// DESCRIPTION
//
// Advance a file_list::const_iterator.
//
// RETURN VALUE
//
// Reference to itself as is standard practice for iterators.
//
// SEE ALSO
//
// index.c write_full_index() for a description of the index file
// format.
//
//*****************************************************************************
{
if ( !c_ || c_ == &end_value ) {
//
// If c_'s value is the "already at end" value (null), or the "just hit
// end" value, set to the "already at end" value.
//
c_ = 0;
return *this;
}
v_.index_ = dec_int( c_ );
v_.occurrences_ = dec_int( c_ );
v_.rank_ = dec_int( c_ );
if ( !v_.meta_ids_.empty() )
v_.meta_ids_.clear();
#ifdef FEATURE_word_pos
if ( v_.pos_deltas_.empty() )
v_.pos_deltas_.reserve( v_.occurrences_ );
else
v_.pos_deltas_.clear();
#endif
while ( true ) {
//
// At this point, c_ must be pointing to a marker.
//
switch ( *c_++ ) {
case Stop_Marker:
//
// Reached the end of file list: set iterator to the "just hit
// end" value.
//
c_ = &end_value;
// no break;
case Word_Entry_Continues_Marker:
return *this;
case Meta_Name_List_Marker:
while ( *c_ != Stop_Marker )
v_.meta_ids_.insert( dec_int( c_ ) );
break;
#ifdef FEATURE_word_pos
case Word_Pos_List_Marker:
while ( *c_ != Stop_Marker )
v_.pos_deltas_.push_back( dec_int(c_) );
break;
#endif
default:
//
// Encountered a list marker we don't know about: we are
// decoding a possibly future index file format that has new
// list types. Since we don't know what to do with it, just
// skip all the numbers in it.
//
while ( *c_ != Stop_Marker )
dec_int( c_ );
}
++c_; // skip Stop_Marker
}
}
/* vim:set et sw=4 ts=4: */
|