File: reg.c

package info (click to toggle)
ircii-pana 1%3A1.1-1
  • links: PTS
  • area: main
  • in suites: sarge
  • size: 14,048 kB
  • ctags: 14,567
  • sloc: ansic: 130,654; sql: 6,041; makefile: 4,313; cpp: 1,270; tcl: 1,230; sh: 638; java: 151
file content (478 lines) | stat: -rw-r--r-- 11,146 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
/*
 * The original was spagetti. I have replaced Michael's code with some of
 * my own which is a thousand times more readable and can also handle '%',
 * which substitutes anything except a space. This should enable people
 * to position things better based on argument. I have also added '?', which
 * substitutes to any single character. And of course it still handles '*'.
 * this should be more efficient than the previous version too.
 *
 * Thus this whole file becomes:
 *
 * Written By Troy Rollo
 * Copyright(c) 1992
 * See the COPYRIGHT file, or do a HELP IRCII COPYRIGHT 
 */


#include "irc.h"
static char cvsrevision[] = "$Id: reg.c,v 1.1.1.1 2003/04/11 01:09:07 dan Exp $";
CVS_REVISION(reg_c)
#include "ircaux.h"
#include "output.h"
#define MAIN_SOURCE
#include "modval.h"

/*
 * The following #define is here because we *know* its behaviour.
 * The behaviour of toupper tends to be undefined when it's given
 * a non lower case letter.
 * All the systems supported by IRCII should be ASCII
 */
#define	mkupper(c)	(((c) >= 'a' && (c) <= 'z') ? ((c) - 'a' + 'A') : c)

#if 0
int old_match(const char *pattern, const char *string)
{
	char	type = 0;

	while (*string && *pattern && *pattern != '*' && *pattern != '%')
	{
		if (*pattern == '\\' && pattern[1])
		{
			if (!*++pattern || !(mkupper(*pattern) == mkupper(*string)))
				return 0;
			else
				pattern++, string++, total_explicit++;
			continue;	/* Erf! try $match(\\* *) */
		}

		if (*pattern == '?')
			pattern++, string++;
		else if (mkupper(*pattern) == mkupper(*string))
			pattern++, string++, total_explicit++;
		else
			break;
	}
	if (*pattern == '*' || *pattern == '%')
	{
		type = (*pattern++);
		while (*string)
		{
			if (old_match(pattern, string))
				return 1;
			else if (type == '*' || *string != ' ')
				string++;
			else
				break;
		}
	}

	/* Slurp up any trailing *'s or %'s... */
	if (!*string && (type == '*' || type == '%'))
		while (*pattern && (*pattern == '*' || *pattern == '%'))
			pattern++;

	if (!*string && !*pattern)
		return 1;

	return 0;
}
#endif

int new_match (const unsigned char *pattern, const unsigned char *string)
{
	int		count = 1;
	int 		asterisk = 0;
	int		percent = 0;
	const char	*last_asterisk_point = NULL;
	const char	*last_percent_point = NULL;
	int		last_asterisk_count = 0;
	int		last_percent_count = 0;
	const char	*after_wildcard = NULL;
	int		sanity = 0;
	const char	*old_pattern = pattern, *old_string = string;
	if (x_debug & DEBUG_REGEX_DEBUG)
		yell("Matching [%s] against [%s]", pattern, string);

	for (;;)
	{
		if (sanity++ > 100000)
		{
			yell("Infinite loop in match! pattern = [%s] string = [%s]", old_pattern, old_string);
			return 0;
		}

		/*
		 * If the last character in the pattern was a *, then
		 * we walk the string until we find the next instance int
		 * string, of the character that was after the *.
		 * If we get to the end of string, then obviously there
		 * is no match.  A * at the end of the pattern is handled
		 * especially, so we dont need to consider that.
		 */
		if (asterisk)
		{
			/*
			 * More pattern, no source.  Obviously this
			 * asterisk isnt going to cut it.  Try again.
			 * This replaces an 'always failure' case. 
			 * In 99% of the cases, we will try again and it
			 * will fail anyhow, but 1% of the cases it would
			 * have succeeded, so we need that retry.
			 */
			if (!*string)
				return 0;

			/*
			 * XXXX Skip over any backslashes...
			 */
			if (*pattern == '\\')
			{
				pattern++;
				if (tolower(*string) != tolower(*pattern))
					continue;
			}

			/*
			 * If the character in the pattern immediately
			 * following the asterisk is a qmark, then we
			 * save where we're at and we allow the ? to be
			 * matched.  If we find it doesnt work later on,
			 * then we will come back to here and try again.
			 *     OR 
			 * We've found the character we're looking for!
			 * Save some state information about how to recover
			 * if we dont match
			 */
			else if (*pattern == '?' || 
				(tolower(*string) == tolower(*pattern)))
			{
				asterisk = 0;
				last_asterisk_point = string;
				last_asterisk_count = count;
			}

			/*
			 * This is not the character we're looking for.
			 */
			else
				string++;

			continue;
		}

		/*
		 * Ok.  If we're dealing with a percent, but not a asterisk,
		 * then we need to look for the character after the percent.
		 * BUT, if we find a space, then we stop anyways.
		 */
		if (percent)
		{
			/*
			 * Ran out of string.  If there is more to the 
			 * pattern, then we failed.  Otherwise if the %
			 * was at the end of the pattern, we havent found
			 * a space, so it succeeds!
			 */
			if (!*string)
			{
				if (*pattern)
					return 0;
				else
					return count;
			}

			/*
			 * XXXX Skip over any backslashes...
			 */
			if (*pattern == '\\')
			{
				pattern++;
				if (tolower(*string) != tolower(*pattern))
					continue;
			}

			/*
			 * If we find a space, then we stop looking at the
			 * percent.  We're definitely done with it.  We also
			 * go back to normal parsing mode, presumably with
			 * the space after the %.
			 */
			if (*string == ' ')
			{
				percent = 0;
				last_percent_point = NULL;
			}
   
			/*
			 * If this is not the char we're looking for, then
			 * keep looking.
			 */
			else if (tolower(*string) != tolower(*pattern))
				string++;

			/*
			 * We found it!  Huzzah!
			 */
			else
			{
				percent = 0;
				last_percent_point = string;
				last_percent_count = count;
			}

			continue;
		}


		/*
		 * Ok.  So at this point, we know we're not handling an
		 * outstanding asterisk or percent request.  So we look
		 * to see what the next char is in the pattern and deal
		 * with it.
		 */
		switch (*pattern)
		{

		/*
		 * If its an asterisk, then we just keep some info about
		 * where we're at. 
		 */
		case ('*') : case ('%') :
		{
			asterisk = 0, percent = 0;
			do
			{
				if (*pattern == '*')
					asterisk = 1;
				pattern++;
			}
			while (*pattern == '*' || *pattern == '%');

			after_wildcard = pattern;
			if (asterisk)
			{
				last_asterisk_point = string;
				last_asterisk_count = count;
			}
			else
			{
				percent = 1;
				last_percent_point = string;
				last_percent_count = count;
			}

			/*
			 * If there's nothing in the pattern after the
			 * asterisk, then it slurps up the rest of string,
			 * and we're definitely done!
			 */
			if (asterisk && !*pattern)
				return count;

			break;
		}

		/*
		 * If its a question mark, then we have to slurp up one 
		 * character from the pattern and the string.
		 */
		case ('?') :
		{
			pattern++;

			/*
			 * If there is nothing left in string, then we
			 * definitely fail.
			 */
			if (!*string)
				return 0;
			string++;
			break;
		}

		/*
		 * De-quote any \'s in the pattern.
		 */
		case ('\\') :
		{
			/*
			 * ircII says that a single \ at the end of a pattern
			 * is defined as a failure. (must quote SOMETHING)
			 */
			pattern++;
			if (!*pattern)
				return 0;

			/*
			 * Check to see if the dequoted character and
			 * the next string character are the same.
			 */
			if (tolower(*pattern) != tolower(*string))
				return 0;

			count++, string++, pattern++;
			break;
		}

		/*
		 * If there is nothing left in the pattern and string,
		 * then we've definitely succeeded.  Return the number of
		 * non-wildcard characters.
		 */
		default:
		{
			if (!*pattern && !*string)
				return count;

			/*
			 * There are regular characters next in the pattern 
			 * and string.  Are they the same?  If they are, walk 
			 * past them and go to the next character.
			 */
			if (tolower(*pattern) == tolower(*string))
			{
				count++, pattern++, string++;
			}

			/*
			 * The two characters are not the same.  If we're 
			 * currently trying to match a wildcard, go back to 
			 * where we started after the wildcard and try looking
			 * again from there.  If we are not currently matching
			 * a wildcard, then the entire match definitely fails.
			 */
			else if (last_asterisk_point)
			{
                                asterisk = 1;
                                string = last_asterisk_point + 1;
                                pattern = after_wildcard;
                                count = last_asterisk_count;
			}
			else if (last_percent_point)
			{
                                percent = 1;
                                string = last_percent_point + 1;
                                pattern = after_wildcard;
                                count = last_percent_count;
			}
			else
				return 0;

			break;
		}
		}
	}

	return 0;
}

/*
 * wild_match: calculate the "value" of str when matched against pattern.
 * The "value" of a string is always zero if it is not matched by the pattern.
 * In all cases where the string is matched by the pattern, then the "value"
 * of the match is 1 plus the number of non-wildcard characters in "str".
 *
 * \\[ and \\] handling done by Jeremy Nelson
 */
int BX_wild_match (register const unsigned char *p, register const unsigned char *str)
{
	/*
	 * Is there a \[ in the pattern to be expanded? 
	 * 
	 * This stuff here just reduces the \[ \] set into a series of
	 * one-simpler patterns and then recurses over the options.
	 */
	if (strstr(p, "\\["))
	{
		char *pattern, *ptr, *ptr2, *arg, *placeholder;
		int nest = 0;

		/*
		 * Only make the copy if we're going to be tearing it apart.
		 */
		pattern = LOCAL_COPY(p);

		/*
		 * We will have to null this out, but not until we've used it
		 */
		placeholder = ptr = ptr2 = strstr(pattern, "\\[");

		/*
		 * Look for the matching \].
		 */
		do
		{
			switch (ptr[1]) 
			{
					/* step over it and add to nest */
				case '[' :  ptr2 = ptr + 2 ;
					    nest++;
					    break;
					/* step over it and remove nest */
				case ']' :  ptr2 = ptr + 2;
					    nest--;
					    break;
				default:
					    ptr2 = ptr + 2;
					    break;
			}
		}
		while (nest && (ptr = strchr(ptr2, '\\')));

		/*
		 * Right now, we know that ptr points to a \] or to a NULL.
		 * Remember that '&&' short circuits and that ptr will
		 * not be set to NULL if (nest) is zero.
		 */
		if (ptr)
		{
			int best_total = 0;

			*ptr = 0;
			ptr += 2;
			*placeholder = 0;
			placeholder += 2;

			/* 
			 * grab words ("" sets or space words) one at a time
			 * and attempt to match all of them.  The best value
			 * matched is the one used.
			 */
			while ((arg = new_next_arg(placeholder, &placeholder)))
			{
				int tmpval;
				char my_buff[BIG_BUFFER_SIZE + 1];

				strlcpy(my_buff, pattern, BIG_BUFFER_SIZE);
				strlcat(my_buff, arg, BIG_BUFFER_SIZE);
				strlcat(my_buff, ptr, BIG_BUFFER_SIZE);

				/*
				 * The total_explicit we return is whatever
				 * sub-pattern has the highest total_explicit
				 */
				if ((tmpval = wild_match(my_buff, str)))
				{
					if (tmpval > best_total)
						best_total = tmpval;
				}
			}

			return best_total; /* end of expansion section */
		}

		/*
		 * Possibly an unmatched \[ \] set.  Just wing it.
		 */
		else
			return new_match(pattern, str);
	}

	/*
	 * Trivial case -- No \[ \] sets, just do the match.
	 */
	else
		return new_match(p, str);
}