File: quick.c

package info (click to toggle)
glimpse 4.1-1
  • links: PTS
  • area: non-free
  • in suites: hamm
  • size: 2,344 kB
  • ctags: 2,254
  • sloc: ansic: 32,194; makefile: 561; sh: 170; perl: 142
file content (232 lines) | stat: -rw-r--r-- 7,741 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
/* Copyright (c) 1994 Burra Gopal, Udi Manber.  All Rights Reserved. */

/*
 * quick.c:	Used to search for a pattern in a compressed file.
 *
 * Algorithm: if the file (or stdin) is a compressed file, then:
 * +  a. Read in the hash-table-index file.
 * +  b. For each page in which the words of the pattern can be found:
 *	 build the hash-table using the words in exactly those pages.
 * +  c. Now, call compress with the given pattern.
 *
 * +  d. Call the normal search routines with the compressed pattern on
 *	 the input file.
 * +  e. If the option is to count number of matches, just exit.
 *	 Otherwise we have to modify the r_output/output routines:
 *
 * +  f. Read in the string-table-index file.
 * +  g. For each page in which the word numbers of the input file can
 *	 be found: build the string-table using the words in exactly
 *	 those pages.
 * +  h. Call uncompress with the input file line to be output and
 *	 output THIS line instead of the original matched line.
 *
 * Part of this will be in agrep and part of this here.
 */

#include "defs.h"
#include <sys/types.h>
#include <sys/stat.h>

/*
 * The quick-functions can be called multiple number of times --
 * they however open the hash, string and freq files only once.
 */

hash_entry *compress_hash_table[HASH_TABLE_SIZE];	/* used for compress: assume it is zeroed by C */
char	loaded_hash_table[HASH_FILE_BLOCKS];		/* bit mask of loaded pages in hash-table: store chars since just 4K: speed is most imp. */
char	*hashindexbuf;
int	hashindexsize;

/* returns length of compressed pattern after filling up the compressed pattern in the user-supplied newpattern buffer */
int
quick_tcompress(freq_file, hash_file, pattern, len, newpattern, maxnewlen, flags)
	char	*freq_file;
	char	*hash_file;
	CHAR	*pattern;
	int	len;
	void	*newpattern;	/* can be FILE* or CHAR* */
	int	*maxnewlen;
	int	flags;
{
	static FILE	*hashfp = NULL, *hashindexfp = NULL;
	static char	old_freq_file[MAX_LINE_LEN] = "", old_hash_file[MAX_LINE_LEN] = "";
	static int	blocksize;
	int		newlen;

	if ((hashfp == NULL) || (strcmp(freq_file, old_freq_file)) || (strcmp(hash_file, old_hash_file)))
	{	/* Have to do some initializations */
		char	s[256];
		struct stat statbuf;

		if (hashfp != NULL) {
			uninitialize_tcompress();
			fclose(hashfp);
			hashfp = NULL;
		}
		else memset(loaded_hash_table, '\0', HASH_FILE_BLOCKS);
		if (!initialize_common(freq_file, flags)) return 0;	/* don't call initialize_tcompress since that will load the FULL hash table */

		if ((hashfp = fopen(hash_file, "r")) == NULL) {
			if (flags & TC_ERRORMSGS) {
				fprintf(stderr, "cannot open cast-dictionary file: %s\n", hash_file);
				fprintf(stderr, "(use -H to give a dictionary-dir or run 'buildcast' to make a dictionary)\n");
			}
			return 0;
		}

		sprintf(s, "%s.index", hash_file);
		if ((hashindexfp = fopen(s, "r")) == NULL) {
			if (flags & TC_ERRORMSGS)
				fprintf(stderr, "cannot open for reading: %s\n", s);
			fclose(hashfp);
			hashfp = NULL;
			return 0;
		}
		blocksize = 0;
		fscanf(hashindexfp, "%d\n", &blocksize);
		if (blocksize == 0) blocksize = DEF_BLOCKSIZE;

		if (fstat(fileno(hashindexfp), &statbuf) == -1) {
			fprintf(stderr, "error in quick_tcompress/fstat on '%s.index'\n", hash_file);
			fclose(hashfp);
			hashfp = NULL;
			fclose(hashindexfp);
			hashindexfp = NULL;
			return 0;
		}

		if ((hashindexbuf = (char *)malloc(statbuf.st_size + 1)) == NULL) {
			if (flags & TC_ERRORMSGS)
				fprintf(stderr, "quick_tcompress: malloc failure!\n");
			fclose(hashfp);
			hashfp = NULL;
			fclose(hashindexfp);
			hashindexfp = NULL;
			return 0;
		}

		if ((hashindexsize = fread(hashindexbuf, 1, statbuf.st_size, hashindexfp)) == -1) {
			fprintf(stderr, "error in quick_tcompress/fread on '%s.index'\n", hash_file);
			fclose(hashfp);
			hashfp = NULL;
			fclose(hashindexfp);
			hashindexfp = NULL;
			return 0;
		}
		hashindexsize ++;	/* st_size - bytes used up for blocksize in file + 1 <= st_size */
		hashindexbuf[hashindexsize] = '\0';
		fclose(hashindexfp);

		strcpy(old_freq_file, freq_file);
		strcpy(old_hash_file, hash_file);
	}
	else rewind(hashfp);	/* Don't do it first time */

	if (pattern[len-1] == '\0') len--;
	build_partial_hash(compress_hash_table, hashfp, hashindexbuf, hashindexsize, pattern, len, blocksize, loaded_hash_table);
	newlen = tcompress(pattern, len, newpattern, maxnewlen, flags);
#if	0
	printf("quick_tcompress: pat=%s len=%d newlen=%d newpat=", pattern, len, newlen);
	for (i=0; i<newlen; i++) printf("%d ", newpattern[i]);
	printf("\n");
#endif	/*0*/
	return newlen;
}

char	*compress_string_table[DEF_MAX_WORDS]; /*[MAX_WORD_LEN+2]; */
char	loaded_string_table[STRING_FILE_BLOCKS];		/* bit mask of loaded pages in string-table: store chars since just 4K: speed is most imp. */
char	*stringindexbuf;
int	stringindexsize;

/* returns length of uncompressed pattern after filling up the uncompressed pattern in the user-supplied newpattern buffer */
int
quick_tuncompress(freq_file, string_file, pattern, len, newpattern, maxnewlen, flags)
	char	*string_file;
	char	*freq_file;
	CHAR	*pattern;
	int	len;
	void	*newpattern;	/* can be FILE* or CHAR* */
	int	*maxnewlen;
	int	flags;
{
	static FILE	*stringfp = NULL, *stringindexfp = NULL;
	static char	old_freq_file[MAX_LINE_LEN] = "", old_string_file[MAX_LINE_LEN] = "";
	static int	blocksize;
	int		newlen;
	int		dummy;

	if ((stringfp == NULL) || (strcmp(freq_file, old_freq_file)) || (strcmp(string_file, old_string_file)))
	{	/* Have to do some initializations */
		char	s[256];
		struct stat statbuf;

		if (stringfp != NULL) {
			uninitialize_tuncompress();
			fclose(stringfp);
			stringfp = NULL;
		}
		else memset(loaded_string_table, '\0', STRING_FILE_BLOCKS);
		if (!initialize_common(freq_file, flags)) return 0;	/* don't call initialize_tuncompress since that will load the FULL string table */

		if ((stringfp = fopen(string_file, "r")) == NULL) {
			if (flags & TC_ERRORMSGS) {
				fprintf(stderr, "cannot open cast-dictionary file: %s\n", string_file);
				fprintf(stderr, "(use -H to give a dictionary-dir or run 'buildcast' to make a dictionary)\n");
			}
			return 0;
		}

		sprintf(s, "%s.index", string_file);
		if ((stringindexfp = fopen(s, "r")) == NULL) {
			if (flags & TC_ERRORMSGS)
				fprintf(stderr, "cannot open for reading: %s\n", s);
			fclose(stringfp);
			stringfp = NULL;
			return 0;
		}
		blocksize = 0;
		fscanf(stringindexfp, "%d\n", &blocksize);
		if (blocksize == 0) blocksize = DEF_BLOCKSIZE;

		if (fstat(fileno(stringindexfp), &statbuf) == -1) {
			fprintf(stderr, "error in quick_tuncompress/fstat on '%s.index'\n", string_file);
			fclose(stringfp);
			stringfp = NULL;
			fclose(stringindexfp);
			stringindexfp = NULL;
			return 0;
		}

		if ((stringindexbuf = (char *)malloc(statbuf.st_size + 1)) == NULL) {
			if (flags & TC_ERRORMSGS)
				fprintf(stderr, "quick_tuncompress: malloc failure!\n");
			fclose(stringfp);
			stringfp = NULL;
			fclose(stringindexfp);
			stringindexfp = NULL;
			return 0;
		}

		stringindexsize = 0;
		while(fscanf(stringindexfp, "%d\n", &dummy) == 1) {
			*((unsigned short *)(stringindexbuf+stringindexsize)) = (unsigned short)dummy;
			stringindexsize+=sizeof(unsigned short);
		}
		fclose(stringindexfp);

		strcpy(old_freq_file, freq_file);
		strcpy(old_string_file, string_file);
	}
	else rewind(stringfp);

	build_partial_string(compress_string_table, stringfp, stringindexbuf, stringindexsize, pattern, len, blocksize, loaded_string_table);
	newlen = tuncompress(pattern, len, newpattern, maxnewlen, flags);
#if	0
	printf("quick_tuncompress: len=%d newlen=%d newpat=%s pat=", len, newlen, newpattern);
	for (i=0; i<len; i++) printf("%d ", pattern[i]);
	printf("\n");
#endif	/*0*/
	return newlen;
}