File: index.h

package info (click to toggle)
lookup 1.08b-15
  • links: PTS
  • area: main
  • in suites: sid
  • size: 1,784 kB
  • sloc: ansic: 12,638; makefile: 247; perl: 174; sh: 53
file content (279 lines) | stat: -rw-r--r-- 9,027 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
#ifndef __INDEX_H__ /* file wrapper */
#define __INDEX_H__
/*
 * Jeffrey Friedl
 * Omron Corporation			ʳ
 * Nagaokakyoshi, Japan			617Ĺ
 *
 * jfriedl@nff.ncl.omron.co.jp
 *
 * This work is placed under the terms of the GNU General Purpose License
 * (the "GNU Copyleft").
 */

#ifndef __PACKED_LIST_H__
# ifndef __SYSTEM_H__
#   include "system.h"
# endif
# if defined(_HAVE_SYS_STDTYPES_H_)
#  include <sys/stdtypes.h>
# else
#  include <sys/types.h>
# endif
#include "packed_list.h"
#endif
#include "virtfile.h"

#define INDEX_MAGIC                 0x6a647800   /* "jdx\0" */ 
#define INDEX_VERSION_MAJOR         1
#define INDEX_VERSION_MINOR         2

/*
 * Jeffrey Friedl, Omron Corporation.
 * jfriedl@nff.ncl.omron.co.jp
 * October 1993
 *
 * Conceptually, following the index for a character will yield the lines
 * in the text file which contain that character.
 * 
 * One more level closer to reality (how it's actually implemented), this
 * is returned as an array of pointers (to each line) and a count indicating
 * how many elements in the array (the array is not null-terminated).
 *
 * One more level closer, the array elements aren't really pointers, but
 * offsets from the start of the file (or from the start of the memory
 * into which the file's been loaded).
 *
 * One more level closer, each element doesn't actually hold the offset into
 * the file, but the _difference_ from the previous offset (the first
 * element actually holding the real offset into the file, as the "previous"
 * offset of the first is zero).
 *
 * One more level closer, it's really not an array of values (differences)
 * but the appropriate number of sequential sets of value-indicating bytes.
 * These are implemented via "packed_list.h".
 */


/*
 * IndexOffset - holds an offset from the beginning of the index
 * (file or allocated memory) to some point in the index.
 * These are found in the index itself.
 */
typedef unsigned long IndexOffset;

/*
 * Also found only in the index itself, TextOffset holds an offset from
 * the start of the text (file or memory) to some point into the text
 * (i.e. starts of lines).
 */
typedef unsigned long TextOffset;

/*
 * The following two macros convert from in-memory text or index pointers
 * into the appropriate xxxxOffset types.
 */
#define makeIndexOffset(IndexStartPointer, PointerSomewhereIntoIndexMemory) \
    ((IndexOffset)((const unsigned char *)(PointerSomewhereIntoIndexMemory) - \
		   (const unsigned char *)(IndexStartPointer)))

#define makeTextOffset(FileStartPointer, PointerSomewhereIntoFileMemory) \
    ((TextOffset)((const unsigned char *)(PointerSomewhereIntoFileMemory) - \
		  (const unsigned char *)(FileStartPointer)))

/*
 * The opposite, converts an OFFSET from the beginning of BASE (which is
 * a pointer) to a pointer of the given TYPE.
 */
#define realptr(base, offset, type) \
    ((type)((const unsigned char *)(base)+(offset)))


/*
 * EUC Japanese are double-byte characters, each with the high bit set.
 * When we find one of these, we look at it as a HI byte and a LO byte,
 * with the high bits cleared (thereby setting their possible range from
 * [0x80 - 0xff]  to  [0x00 - 0x7f], which is a bit more convenient to
 * work with).  For "regular" characters, we use HI=0, LO="regular byte".
 *
 * Basically, we would like to be able to do something like
 *     ListOfLinesForOneCharacter = index[HI][LO]
 * but that would require a larger array than we want, since well over half
 * of the possible codes aren't used.
 *
 * So I compromise a bit... I keep the [HI] part (all 128 slots), but each
 * of those 128 slots is, rather than 128 slots for [LO], some variable
 * number of slots along with info indicating how many are there.
 *
 * If, for example, the lowest LO for [HI=123] is 10 and the highest LO
 * used is 20, I would keep 11 slots with first_lo of 10 and end_lo of 20.
 *
 * The ListOfLinesForOneCharacter is logically an array of pointers,
 * so my index structure might look something like
 *
 *   struct
 *   {
 *         int first_lo, end_lo;
 *         unsigned char *lists_of_lines[];  <--- indexed by (LO-first_lo)
 *         unsigned list_counts[];           <--- indexed by (LO-first_lo)
 *   } hi[128];
 *
 * But rather than use "char *foo[]", I use IndexOffset so that it can make
 * sense in core or on disk.
 */
struct index
{
    unsigned magic;                 /* INDEX_MAGIC */
    unsigned short version_major;   /* INDEX_VERSION_MAJOR */
    unsigned short version_minor;   /* INDEX_VERSION_MINOR */

    unsigned indexsize;	  /* size of complete index, including this header */
    unsigned linecount;   /* FYI, number of lines in the file indexed */
    unsigned limitcount;  /* FYI, if char on this many lines, not in index */
    FILE *FileP;

    #define IsMemIndex(I)   ((I)->FileP != NULL)
       
    time_t   st__mtime;	  /* st_mtime of file indexed */

    /* the real index */
    struct
    {
	unsigned char first_lo;
	unsigned char end_lo;
	IndexOffset listcount;
	IndexOffset shifted_lo;
    } hi[128];
    /* the rest of the index follows... (indexsize-sizeof(index)) bytes */
};

#ifndef USE_SHORT_INDEX_COUNTS
#define USE_SHORT_INDEX_COUNTS 1
#endif
#if USE_SHORT_INDEX_COUNTS
  typedef unsigned short elementcount;
#else
  typedef unsigned elementcount;
#endif
#define SKIPPED_COUNT ((elementcount)~0)
#define MAX_COUNT (SKIPPED_COUNT - 1)

#if !defined(__GNUC__)
#  if !defined(__volatile__)
#    define __volatile__ /*nothing; for use with volatile functions */
#  endif
#  if !defined(__inline__)
#    define __inline__ /*nothing; for use with volatile functions */
#  endif
#endif

/*
 * More or less does the virtual
 *     dest = index[hi].list_of_counts[lo]
 * returning true if it could be gotten, false if there was no such
 * info entered (i.e. if HI and/or LO were bad).
 */
static __inline__ int
get_index_count(const struct index *i, unsigned char hi,
		unsigned char lo, elementcount *dest)
{
    /* make sure that the LO falls into the range of lo's for the HI */
    if (lo < i->hi[hi].first_lo || lo >= i->hi[hi].end_lo)
	return 0; /* no such character in the index */

    *dest = realptr(i, i->hi[hi].listcount, elementcount *)
	                                 [lo - i->hi[hi].first_lo];
    return 1;
}


/*
 * More or less does the virtual
 *     dest = index[hi].list_of_counts[lo]
 * returning true if it could be gotten, false if there was no such
 * info entered (i.e. if HI and/or LO were bad).
 */
static __inline__ int
mem_get_index_count(const struct index *i,
		    unsigned char hi,
		    unsigned char lo,
		    elementcount *dest)
{
    long loc;
    /* make sure that the LO falls into the range of lo's for the HI */
    if (lo < i->hi[hi].first_lo || lo >= i->hi[hi].end_lo)
	return 0; /* no such character in the index */

    
    loc = (long)&realptr(0, i->hi[hi].listcount, elementcount *)
	[lo - i->hi[hi].first_lo];

    if (fseek(i->FileP, loc, SEEK_SET) != 0)
	die("bad fseek to %ld (fp=%x) at %s line %d, returned %ld: %n\n",
	    (long)loc, i->FileP, __FILE__, __LINE__);

    fread(dest, sizeof(elementcount), 1, i->FileP);
    return 1;
}


static __inline__ int
get_index_list(const struct index *i,
	       unsigned char hi,
	       unsigned char lo,
	       const unsigned char **dest)
{
    /* make sure that the LO falls into the range of lo's for the HI */
    if (lo < i->hi[hi].first_lo || lo >= i->hi[hi].end_lo)
	return 0; /* no such character in the index */
    else {
        IndexOffset listoffset =
	    realptr(i, i->hi[hi].shifted_lo, IndexOffset *)
		[lo - i->hi[hi].first_lo];
	*dest = realptr(i, listoffset, unsigned char *);
	return 1;
    }
}

static __inline__ int
mem_get_index_list(const struct index *i,
		   unsigned char hi,
		   unsigned char lo,
		   IndexOffset *dest)
{
    /* make sure that the LO falls into the range of lo's for the HI */
    if (lo < i->hi[hi].first_lo || lo >= i->hi[hi].end_lo)
	return 0; /* no such character in the index */
    else {
	long loc = (long) &realptr(0, i->hi[hi].shifted_lo, IndexOffset *)
	    [lo - i->hi[hi].first_lo];

	if (fseek(i->FileP, loc, SEEK_SET) != 0)
	    die("bad fseek to %ld (fp=%x) at %s line %d, returned %ld: %n\n",
		loc, i->FileP, __FILE__, __LINE__);

	if (fread(dest, sizeof(*dest), 1, i->FileP) !=1)
	    die("bad read from fp=%x at %s line %d: %n\n",
		i->FileP, __FILE__, __LINE__);
	return 1;
    }
}

/* other things defined in index.c */
extern struct index *
create_index(VirtFile *v, unsigned percent, unsigned flags);


/* These flags must be distinct from those in loadfile.h */
#define INDEX_REPORT_PROGRESS    0x00000001
#define INDEX_REPORT_SKIPPED     0x00000002
#define INDEX_REPORT_STATS	 0x00000004

extern int is_index_file(const char *filename);
struct index *read_index_file(const char *filename, int try, unsigned flags);
struct index *mem_read_index_file(const char *filename);
int write_index_file(const char *filename, const struct index *i);

#undef __inline__

#endif /* file wrapper */