File: config.h

package info (click to toggle)
swish++ 6.1.5-2
  • links: PTS
  • area: main
  • in suites: squeeze
  • size: 2,256 kB
  • ctags: 1,759
  • sloc: ansic: 11,931; lisp: 804; sh: 629; perl: 366; makefile: 80
file content (253 lines) | stat: -rw-r--r-- 11,924 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
/*
**      SWISH++
**      config.h
**
**      Copyright (C) 1998  Paul J. Lucas
**
**      This program is free software; you can redistribute it and/or modify
**      it under the terms of the GNU General Public License as published by
**      the Free Software Foundation; either version 2 of the License, or
**      (at your option) any later version.
**
**      This program is distributed in the hope that it will be useful,
**      but WITHOUT ANY WARRANTY; without even the implied warranty of
**      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
**      GNU General Public License for more details.
**
**      You should have received a copy of the GNU General Public License
**      along with this program; if not, write to the Free Software
**      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#ifndef config_H
#define config_H

////////// Word determination /////////////////////////////////////////////////

int const   Word_Hard_Min_Size          = 3;
int const   Word_Hard_Max_Size          = 25;
//          The minimum and maximum lengths a word must be in order even to
//          bother doing more aggressive checks on it to determine if it should
//          be indexed.

int const   Word_Min_Size               = 4;
//          The minimum length a non-acronym word must be in order to be
//          considered for indexing.

int const   Word_Min_Vowels             = 1;
//          The minimum number of vowels a word must have in order to be
//          indexed.

int const   Word_Hex_Max_Size           = 4;
//          The maximum length a string composed entirely of hexadecimal digits
//          i.e., ASCII hex data, can be before it is discarded.  Note that the
//          word "cafe" is a legitimate English word composed entirely of
//          hexedecimal digits.  This parameter is used only by extract(1) in
//          extract.c.

// I don't think there is a word in English that has more than...

int const   Word_Max_Consec_Consonants  = 7;
//          ...this many consecutive consonants (like "symphysis")

int const   Word_Max_Consec_Vowels      = 5;
//          ...this many consecutive vowels (like "queueing")

int const   Word_Max_Consec_Same        = 2;
//          ...this many of the same alphabetic character consecutively

int const   Word_Max_Consec_Puncts      = 1;
//          ...this many punctuation character in a row

// Characters that are permissible in words: letters must be lower case and
// upper case letters would be redundant.
//
char const  Word_Chars[] = "&'-0123456789abcdefghijklmnopqrstuvwxyz_";
//          Characters that may be in a word.  Note that '&' is here so
//          acronyms like "AT&T" are treated as one word.  Unlike SWISH-E, ';'
//          does not need to be here to recognize and convert character entity
//          references.

#define     OPTIMIZE_WORD_CHARS         1
//          If you are using the default set of characters, that is the
//          alphanumerics and "&'-_" characters, then having this macro set to
//          1 will optimize the is_word_char() function yielding about a 10%
//          performance improvement; alternatively, you can also edit that
//          function to keep the optimization if you are not using the default
//          set of characters.  See word_util.h for details.

char const  Word_Begin_Chars[] = "0123456789abcdefghijklmnopqrstuvwxyz";
//          Characters that may begin a word; should be a subset of the above.

#define     OPTIMIZE_WORD_BEGIN_CHARS   1
//          Same deal as with OPTIMIZE_WORD_CHARS.

char const  Word_End_Chars[] = "0123456789abcdefghijklmnopqrstuvwxyz";
//          Characters that may end a word; usually the same as the above.

#define     OPTIMIZE_WORD_END_CHARS     1
//          Same deal as with OPTIMIZE_WORD_CHARS.

#ifdef  SEARCH_DAEMON
////////// Search server daemon parameters ////////////////////////////////////

char const  SocketFile_Default[]        = "/tmp/search.socket";
//          Default name of the Unix domain socket file; this can be overridden
//          either in a config. file or on the command line.

int const   SocketPort_Default          = 1967;
//          Default port number of the TCP socket; this can be overridden
//          either in a config. file or on the command line.

int const   SocketQueueSize_Default     = 511;
//          Maximum number of queued connections for a socket.  From [Stevens
//          1998], p. 96:
//
//              Historically, sample code always shows a backlog of 5,
//              as that was the maximum value supported by 4.2BSD.
//              This was adequate in the 1980s when busy servers would
//              handle only a few hundred connections per day.  But
//              with the growth of the World Wide Web (WWW), where busy
//              servers handle millions of connections per day, this
//              small number is completely inadequate.  Busy HTTP
//              servers must specify a much larger backlog, and newer
//              kernels must support larger values.
//
//          Unfortunately, Stevens doesn't say what a good value is.  The
//          default 511 value is taken from httpd.h in Apache:
//
//              It defaults to 511 instead of 512 because some systems
//              store it as an 8-bit datatype; 512 truncated to 8-bits
//              is 0, while 511 is 255 when truncated.
//
//          If it's good enough for Apache, it's good enough for us.  This can
//          be overridden either in a config. file or on the command line.

int const   SocketTimeout_Default       = 10;   // seconds
//          The number of seconds a client has to complete a search request
//          before being disconnected.  This is to prevent a client from
//          connecting, not completing a request, and causing the thread
//          servicing the request to wait forever.  This can be overridden
//          either in a config. file or on the command line.

int const   ThreadsMin_Default          = 5;
//          The minimum number of simultanous threads; this can be overridden
//          either in a config. file or on the command line.

int const   ThreadsMax_Default          = 100;
//          The maximum number of simultanous threads; this can be overridden
//          either in a config. file or on the command line.

int const   ThreadTimeout_Default       = 30;   // seconds
//          The number of seconds until an idle spare thread times out and
//          destroys itself.  This can be overridden either in a config.
//          file or on the command line.

char const  User_Default[]              = "nobody";
char const  Group_Default[]             = "nobody";
//          The user and group to switch to after initialization (if root to
//          begin with).  This can be overridden either in a config.  file or
//          on the command line.
#endif

////////// Miscellaneous parameters ///////////////////////////////////////////

char const  ConfigFile_Default[]        = "swish++.conf";
//          Default name of the configuration file; this can be overridden on
//          the command line.

char const  ExtractExtension_Default[]  = "txt";
//          Default extension to append to filenames during extraction.  This
//          can be overridden either in a config. file or on the command line.

int const   FilesGrow_Default           = 100;
//          Default number of files to grow reserved space for when
//          incrementally indexing.  This can be overridden either in a config.
//          file or on the command line.

int const   FilesReserve_Default        = 1000;
//          Default maximum number of files to reserve space for; see
//          file_info.c for details.  This can be overridden either in a
//          config. file or on the command line.

int const   Fork_Attempts               = 5;
//          Number of times to try to fork before giving up.  This parameter is
//          used only in filter.c.

int const   Fork_Sleep                  = 5;    // seconds
//          Number of seconds to sleep before retrying to fork.  This parameter
//          is used only in filter.c.

char const  IndexFile_Default[]         = "swish++.index";
//          Default name of the index file generated/searched; can be
//          overridden either in a config. file or on the command line.

int const   ResultsMax_Default          = 100;
//          Default maximum number of search results; this can be overridden
//          either in a config. file or on the command line.

char const  ShellFilenameDelimChars[]   = " \t&;<>|";
//          Characters in a Unix shell command that delimit file names.  Note
//          that this says "file" (not "path") names.

char const  ShellFilenameEscapeChars[]  = " !\"#$&'()*/;<>?[\\]^`{|}~";
//          Characters in a file name that must be escaped when passed to a
//          Unix shell.  This is a superset of what are commonly referred to as
//          "meta-characers" because the space and tab characters are included.
//          Note again that this says "file" (not "path") name.

#ifdef  __CYGWIN__
char const  TempDirectory_Default[]     = "/temp";
#else
#error      You have not set TempDirectory_Default for your system.
#error      Comment out these lines after you have set it.

char const  TempDirectory_Default[]     = "/tmp";
#endif
//          Default directory to use for temporary files during indexing.  If
//          your OS mounts swap space via /tmp (e.g., Solaris), as indexing
//          progresses and more files get created in /tmp, you will have less
//          swap space, indexing will get slower, and you may run out of
//          memory.  If this is the case, you can either change this default
//          here for all users (preferred) or override it either in a config.
//          file or on the command line to use a directory on a real
//          filesystem, i.e., one on a physical disk, e.g., /var/tmp on some
//          OSs.  The directory must exist.

int const   TitleLines_Default          = 12;
//          Specifies the maximum number of lines into a file for its "title"
//          (whatever that means for a given file format); this can be
//          overridden either in a config. file or on the command line.

int const   Title_Max_Size              = 200;
//          Maximum length of a file "title" (whatever that means for a given
//          file format).

#ifdef  FEATURE_word_pos
int const   WordsNear_Default           = 10;
//          The maximum number of words apart two words can be to be considered
//          "near" each other; this can be overridden either in a config. file
//          or on the command line.
#endif

int const   WordPercentMax_Default      = 100;
//          Default maximum percentage of files a word may occur in before it
//          is discarded as being too frequent; this can be overridden either
//          in a config. file or on the command line.

#error      You have not set WordThreshold_Default for your system.
#error      Comment out these lines after you have set it.

int const   WordThreshold_Default       = 250000;
//          The word count past which partial indicies are generated and merged
//          since all the words are too big to fit into memory at the same
//          time.  If you index and your machine begins to swap like mad, lower
//          this value.  The above works OK in a 64MB machine.  A rule of thumb
//          is to add 250000 words for each additional 64MB of RAM you have.
//          These numbers are for a SPARC machine running Solaris.  Other
//          machines running other operating systems use memory differently.
//          You simply have to experiment.  Only the super-user can increase
//          this either in a config. file or on the command line.

#endif  /* config_H */
/* vim:set et sw=4 ts=4: */