1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
|
static const char rcsid[] = "$Id: wildmat.c,v 1.6 2004/08/11 15:11:21 will Exp $";
/*
**
** Do shell-style pattern matching for ?, \, [], and * characters.
** Might not be robust in face of malformed patterns; e.g., "foo[a-"
** could cause a segmentation violation. It is 8bit clean.
**
** Modified 3 Mar 1993 by Will Deich, will@surya.caltech.edu:
** A leading ^ means to complement the match; that is,
** wildmat(string, "^"pat) returns TRUE if string does NOT
** match the pattern.
** Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
** Rich $alz is now <rsalz@osf.org>.
** April, 1991: Replaced mutually-recursive calls with in-line code
** for the star character.
**
** Special thanks to Lars Mathiesen <thorinn@diku.dk> for the ABORT code.
** This can greatly speed up failing wildcard patterns. For example:
** pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-*
** text 1: -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1
** text 2: -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1
** Text 1 matches with 51 calls, while text 2 fails with 54 calls. Without
** the ABORT code, it takes 22310 calls to fail. Ugh. The following
** explanation is from Lars:
** The precondition that must be fulfilled is that DoMatch will consume
** at least one character in text. This is true if *p is neither '*' nor
** '\0'.) The last return has ABORT instead of FALSE to avoid quadratic
** behaviour in cases like pattern "*a*b*c*d" with text "abcxxxxx". With
** FALSE, each star-loop has to run to the end of the text; with ABORT
** only the last one does.
**
** Once the control of one instance of DoMatch enters the star-loop, that
** instance will return either TRUE or ABORT, and any calling instance
** will therefore return immediately after (without calling recursively
** again). In effect, only one star-loop is ever active. It would be
** possible to modify the code to maintain this context explicitly,
** eliminating all recursive calls at the cost of some complication and
** loss of clarity (and the ABORT stuff seems to be unclear enough by
** itself). I think it would be unwise to try to get this into a
** released version unless you have a good test data base to try it out
** on.
*/
#ifdef TEST
#include <stdio.h>
#endif
#define TRUE 1
#define FALSE 0
#define ABORT -1
/* What character marks an inverted character class? */
#define NEGATE_CLASS '^'
/* Is "*" a common pattern? */
#define OPTIMIZE_JUST_STAR
/* Do tar(1) matching rules, which ignore a trailing slash? */
#undef MATCH_TAR_PATTERN
/*
** Match text and p, return TRUE, FALSE, or ABORT.
*/
static int
DoMatch(text, p)
register char *text;
register char *p;
{
register int last;
register int matched;
register int reverse;
for ( ; *p; text++, p++) {
if (*text == '\0' && *p != '*')
return ABORT;
switch (*p) {
case '\\':
/* Literal match with following character. */
p++;
/* FALLTHROUGH */
default:
if (*text != *p) {
#ifdef VERBOSETEST
printf("fail:text=%c != pat=%c\n", *text, *p);
#endif
return FALSE;
}
#ifdef VERBOSETEST
printf("match:text=%c == pat=%c\n", *text, *p);
#endif
continue;
case '?':
/* Match anything. */
#ifdef VERBOSETEST
printf("match:text=%c ; pat=%c\n", *text, *p);
#endif
continue;
case '*':
while (*++p == '*')
/* Consecutive stars act just like one. */
continue;
if (*p == '\0') {
/* Trailing star matches everything. */
#ifdef VERBOSETEST
printf("trailing * matches everything\n");
#endif
return TRUE;
}
while (*text) {
if ((matched = DoMatch(text++, p)) != FALSE) {
return matched;
}
}
#ifdef VERBOSETEST
printf("ABORT!\n");
#endif
return ABORT;
case '[':
reverse = p[1] == NEGATE_CLASS ? TRUE : FALSE;
#ifdef VERBOSETEST
printf("char class\n");
#endif
if (reverse) {
/* Inverted character class. */
p++;
#ifdef VERBOSETEST
printf("(negated)\n");
#endif
}
matched = FALSE;
if (p[1] == ']' || p[1] == '-')
if (*++p == *text)
matched = TRUE;
for (last = *p; *++p && *p != ']'; last = *p) {
/* This next line requires a good C compiler. */
if (*p == '-' && p[1] != ']'
? *text <= *++p && *text >= last : *text == *p) {
matched = TRUE;
#ifdef VERBOSETEST
printf("in char class, %c matched %c or maybe %c\n",
*text, *p, *(p-1));
#endif
}
}
if (matched == reverse) {
#ifdef VERBOSETEST
printf("failed match to class\n");
#endif
return FALSE;
}
continue;
}
}
#ifdef MATCH_TAR_PATTERN
if (*text == '/')
return TRUE;
#endif /* MATCH_TAR_PATTERN */
return *text == '\0';
}
/*
** User-level routine. Returns TRUE or FALSE.
*/
int
wildmat(text, p)
char *text;
char *p;
{
#ifdef OPTIMIZE_JUST_STAR
if (p[0] == '*' && p[1] == '\0')
return TRUE;
#endif /* OPTIMIZE_JUST_STAR */
return (p[0] == '^') ? DoMatch(text,p+1) != TRUE : DoMatch(text, p) == TRUE;
}
#ifdef TEST
/* Yes, we use gets not fgets. Sue me. */
extern char *gets();
int
main()
{
char p[80];
char text[80];
printf("Wildmat tester. Enter pattern, then strings to test.\n");
printf("A blank line gets prompts for a new pattern; a blank pattern\n");
printf("exits the program.\n");
for ( ; ; ) {
printf("\nEnter pattern: ");
(void)fflush(stdout);
if (gets(p) == NULL || p[0] == '\0')
break;
for ( ; ; ) {
printf("Enter text: ");
(void)fflush(stdout);
if (gets(text) == NULL)
exit(0);
if (text[0] == '\0')
/* Blank line; go back and get a new pattern. */
break;
printf(" %s\n", wildmat(text, p) ? "YES" : "NO");
}
}
exit(0);
/* NOTREACHED */
}
#endif /* defined(TEST) */
|