File: config.h

package info (click to toggle)
swish++ 1.1b3-3
  • links: PTS
  • area: main
  • in suites: slink
  • size: 416 kB
  • ctags: 409
  • sloc: ansic: 2,842; makefile: 247; sh: 48
file content (111 lines) | stat: -rw-r--r-- 3,981 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
/*
**	SWISH++
**	config.h
**
**	Copyright (C) 1998  Paul J. Lucas
**
**	This program is free software; you can redistribute it and/or modify
**	it under the terms of the GNU General Public License as published by
**	the Free Software Foundation; either version 2 of the License, or
**	(at your option) any later version.
** 
**	This program is distributed in the hope that it will be useful,
**	but WITHOUT ANY WARRANTY; without even the implied warranty of
**	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
**	GNU General Public License for more details.
** 
**	You should have received a copy of the GNU General Public License
**	along with this program; if not, write to the Free Software
**	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#ifndef	config_H
#define	config_H

////////// Word determination /////////////////////////////////////////////////

int const	Word_Hard_Min_Size		= 3;
int const	Word_Hard_Max_Size		= 25;
//		The minimum and maximum lengths a word must be in order even
//		to bother doing more aggressive checks on to determine if it
//		should be indexed.

int const	Word_Min_Size			= 4;
//		The minimum length a non-acronym word must be in order to be
//		considered for indexing.

int const	Word_Hex_Min_Size		= 5;
//		The minimum length a string composed entirely of hexadecimal
//		digits i.e., ASCII hex data, must be before it is discarded.
//		Note that the word "cafe" is a legitimate English word
//		composed entirely of hexedecimal digits.  This parameter is
//		used only in extract.c.

// I don't think there is a word in English that has more than...

int const	Word_Max_Consec_Consonants	= 5;
//		...5 consecutive consonants

int const	Word_Max_Consec_Vowels		= 4;
//		...4 consecutive vowels (like "queue")

int const	Word_Max_Consec_Same		= 2;
//		...2 of the same alphabetic character consecutively

// Characters that are permissible in words.  Note that '&' is here so acronyms
// like "AT&T" are treated as one word.  Unlike SWISH-E, ';' does not need to
// be here to recognize and convert character entity references.
//
char const	Word_Chars[] = "&'-0123456789abcdefghijklmnopqrstuvwxyz_";
		// Characters that may be in a word.

char const	Word_Begin_Chars[] = "0123456789abcdefghijklmnopqrstuvwxyz";
		// Characters that may begin a word; should be a subset of the
		// above.

char const	Word_End_Chars[] = "0123456789abcdefghijklmnopqrstuvwxyz";
		// Characters that may end a word; usually the same as the
		// above.

////////// HTML file parameters ///////////////////////////////////////////////

int const	Title_Lines			= 12;
//		Specifies the maximum number of lines into a file to look at
//		for HTML <TITLE> tags.

int const	Title_Max_Size			= 200;
//		Maximum length of a title.

////////// Miscellaneous parameters ///////////////////////////////////////////

int const	Entity_Max_Size			= 7;
//		The maximum size of an entity reference, e.g., "&eacute;" NOT
//		counting the leading '&' or the trailing ';'.

int const	Files_Default			= 1000;
//		Default maximum number of files to pre-allocate space for; see
//		file_info::operator new() in file_info.c for details.

int const	Fork_Attempts			= 5;
//		Number of times to try to fork before giving up.

int const	Fork_Sleep			= 5;
//		Number of seconds to sleep before retrying to fork.

int const	Results_Max_Default		= 100;
//		Default maximum number of results; this can be overridden on
//		the command line.

char const	Tmp_Dir[]			= "/tmp/";
//		The directory to use for temporary files.  Note that it MUST
//		have a trailing '/'.

int const	Word_Threshold			= 250000;
//		The word count past which partial indicies are generated and
//		merged since the words are too big to fit into memory.  If
//		you index and your machine begins to swap like mad, lower
//		this value.  The above works OK in a 64MB machine.  A rule of
//		thumb is to add 250000 words for each additional 64MB of RAM
//		you have.

#endif	/* config_H */