1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
|
#ifndef __INDEX_H__ /* file wrapper */
#define __INDEX_H__
/*
* Jeffrey Friedl
* Omron Corporation ʳ
* Nagaokakyoshi, Japan 617Ĺ
*
* jfriedl@nff.ncl.omron.co.jp
*
* This work is placed under the terms of the GNU General Purpose License
* (the "GNU Copyleft").
*/
#ifndef __PACKED_LIST_H__
# ifndef __SYSTEM_H__
# include "system.h"
# endif
# if defined(_HAVE_SYS_STDTYPES_H_)
# include <sys/stdtypes.h>
# else
# include <sys/types.h>
# endif
#include "packed_list.h"
#endif
#include "virtfile.h"
#define INDEX_MAGIC 0x6a647800 /* "jdx\0" */
#define INDEX_VERSION_MAJOR 1
#define INDEX_VERSION_MINOR 2
/*
* Jeffrey Friedl, Omron Corporation.
* jfriedl@nff.ncl.omron.co.jp
* October 1993
*
* Conceptually, following the index for a character will yield the lines
* in the text file which contain that character.
*
* One more level closer to reality (how it's actually implemented), this
* is returned as an array of pointers (to each line) and a count indicating
* how many elements in the array (the array is not null-terminated).
*
* One more level closer, the array elements aren't really pointers, but
* offsets from the start of the file (or from the start of the memory
* into which the file's been loaded).
*
* One more level closer, each element doesn't actually hold the offset into
* the file, but the _difference_ from the previous offset (the first
* element actually holding the real offset into the file, as the "previous"
* offset of the first is zero).
*
* One more level closer, it's really not an array of values (differences)
* but the appropriate number of sequential sets of value-indicating bytes.
* These are implemented via "packed_list.h".
*/
/*
* IndexOffset - holds an offset from the beginning of the index
* (file or allocated memory) to some point in the index.
* These are found in the index itself.
*/
typedef unsigned long IndexOffset;
/*
* Also found only in the index itself, TextOffset holds an offset from
* the start of the text (file or memory) to some point into the text
* (i.e. starts of lines).
*/
typedef unsigned long TextOffset;
/*
* The following two macros convert from in-memory text or index pointers
* into the appropriate xxxxOffset types.
*/
#define makeIndexOffset(IndexStartPointer, PointerSomewhereIntoIndexMemory) \
((IndexOffset)((const unsigned char *)(PointerSomewhereIntoIndexMemory) - \
(const unsigned char *)(IndexStartPointer)))
#define makeTextOffset(FileStartPointer, PointerSomewhereIntoFileMemory) \
((TextOffset)((const unsigned char *)(PointerSomewhereIntoFileMemory) - \
(const unsigned char *)(FileStartPointer)))
/*
* The opposite, converts an OFFSET from the beginning of BASE (which is
* a pointer) to a pointer of the given TYPE.
*/
#define realptr(base, offset, type) \
((type)((const unsigned char *)(base)+(offset)))
/*
* EUC Japanese are double-byte characters, each with the high bit set.
* When we find one of these, we look at it as a HI byte and a LO byte,
* with the high bits cleared (thereby setting their possible range from
* [0x80 - 0xff] to [0x00 - 0x7f], which is a bit more convenient to
* work with). For "regular" characters, we use HI=0, LO="regular byte".
*
* Basically, we would like to be able to do something like
* ListOfLinesForOneCharacter = index[HI][LO]
* but that would require a larger array than we want, since well over half
* of the possible codes aren't used.
*
* So I compromise a bit... I keep the [HI] part (all 128 slots), but each
* of those 128 slots is, rather than 128 slots for [LO], some variable
* number of slots along with info indicating how many are there.
*
* If, for example, the lowest LO for [HI=123] is 10 and the highest LO
* used is 20, I would keep 11 slots with first_lo of 10 and end_lo of 20.
*
* The ListOfLinesForOneCharacter is logically an array of pointers,
* so my index structure might look something like
*
* struct
* {
* int first_lo, end_lo;
* unsigned char *lists_of_lines[]; <--- indexed by (LO-first_lo)
* unsigned list_counts[]; <--- indexed by (LO-first_lo)
* } hi[128];
*
* But rather than use "char *foo[]", I use IndexOffset so that it can make
* sense in core or on disk.
*/
struct index
{
unsigned magic; /* INDEX_MAGIC */
unsigned short version_major; /* INDEX_VERSION_MAJOR */
unsigned short version_minor; /* INDEX_VERSION_MINOR */
unsigned indexsize; /* size of complete index, including this header */
unsigned linecount; /* FYI, number of lines in the file indexed */
unsigned limitcount; /* FYI, if char on this many lines, not in index */
FILE *FileP;
#define IsMemIndex(I) ((I)->FileP != NULL)
time_t st__mtime; /* st_mtime of file indexed */
/* the real index */
struct
{
unsigned char first_lo;
unsigned char end_lo;
IndexOffset listcount;
IndexOffset shifted_lo;
} hi[128];
/* the rest of the index follows... (indexsize-sizeof(index)) bytes */
};
#ifndef USE_SHORT_INDEX_COUNTS
#define USE_SHORT_INDEX_COUNTS 1
#endif
#if USE_SHORT_INDEX_COUNTS
typedef unsigned short elementcount;
#else
typedef unsigned elementcount;
#endif
#define SKIPPED_COUNT ((elementcount)~0)
#define MAX_COUNT (SKIPPED_COUNT - 1)
#if !defined(__GNUC__)
# if !defined(__volatile__)
# define __volatile__ /*nothing; for use with volatile functions */
# endif
# if !defined(__inline__)
# define __inline__ /*nothing; for use with volatile functions */
# endif
#endif
/*
* More or less does the virtual
* dest = index[hi].list_of_counts[lo]
* returning true if it could be gotten, false if there was no such
* info entered (i.e. if HI and/or LO were bad).
*/
static __inline__ int
get_index_count(const struct index *i, unsigned char hi,
unsigned char lo, elementcount *dest)
{
/* make sure that the LO falls into the range of lo's for the HI */
if (lo < i->hi[hi].first_lo || lo >= i->hi[hi].end_lo)
return 0; /* no such character in the index */
*dest = realptr(i, i->hi[hi].listcount, elementcount *)
[lo - i->hi[hi].first_lo];
return 1;
}
/*
* More or less does the virtual
* dest = index[hi].list_of_counts[lo]
* returning true if it could be gotten, false if there was no such
* info entered (i.e. if HI and/or LO were bad).
*/
static __inline__ int
mem_get_index_count(const struct index *i,
unsigned char hi,
unsigned char lo,
elementcount *dest)
{
long loc;
/* make sure that the LO falls into the range of lo's for the HI */
if (lo < i->hi[hi].first_lo || lo >= i->hi[hi].end_lo)
return 0; /* no such character in the index */
loc = (long)&realptr(0, i->hi[hi].listcount, elementcount *)
[lo - i->hi[hi].first_lo];
if (fseek(i->FileP, loc, SEEK_SET) != 0)
die("bad fseek to %ld (fp=%x) at %s line %d, returned %ld: %n\n",
(long)loc, i->FileP, __FILE__, __LINE__);
fread(dest, sizeof(elementcount), 1, i->FileP);
return 1;
}
static __inline__ int
get_index_list(const struct index *i,
unsigned char hi,
unsigned char lo,
const unsigned char **dest)
{
/* make sure that the LO falls into the range of lo's for the HI */
if (lo < i->hi[hi].first_lo || lo >= i->hi[hi].end_lo)
return 0; /* no such character in the index */
else {
IndexOffset listoffset =
realptr(i, i->hi[hi].shifted_lo, IndexOffset *)
[lo - i->hi[hi].first_lo];
*dest = realptr(i, listoffset, unsigned char *);
return 1;
}
}
static __inline__ int
mem_get_index_list(const struct index *i,
unsigned char hi,
unsigned char lo,
IndexOffset *dest)
{
/* make sure that the LO falls into the range of lo's for the HI */
if (lo < i->hi[hi].first_lo || lo >= i->hi[hi].end_lo)
return 0; /* no such character in the index */
else {
long loc = (long) &realptr(0, i->hi[hi].shifted_lo, IndexOffset *)
[lo - i->hi[hi].first_lo];
if (fseek(i->FileP, loc, SEEK_SET) != 0)
die("bad fseek to %ld (fp=%x) at %s line %d, returned %ld: %n\n",
loc, i->FileP, __FILE__, __LINE__);
if (fread(dest, sizeof(*dest), 1, i->FileP) !=1)
die("bad read from fp=%x at %s line %d: %n\n",
i->FileP, __FILE__, __LINE__);
return 1;
}
}
/* other things defined in index.c */
extern struct index *
create_index(VirtFile *v, unsigned percent, unsigned flags);
/* These flags must be distinct from those in loadfile.h */
#define INDEX_REPORT_PROGRESS 0x00000001
#define INDEX_REPORT_SKIPPED 0x00000002
#define INDEX_REPORT_STATS 0x00000004
extern int is_index_file(const char *filename);
struct index *read_index_file(const char *filename, int try, unsigned flags);
struct index *mem_read_index_file(const char *filename);
int write_index_file(const char *filename, const struct index *i);
#undef __inline__
#endif /* file wrapper */
|