1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
|
/* $Revision: 1.2 $
**
** Do shell-style pattern matching for ?, \, [], and * characters.
** Might not be robust in face of malformed patterns; e.g., "foo[a-"
** could cause a segmentation violation. It is 8bit clean.
**
** Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
** Rich $alz is now <rsalz@osf.org>.
** April, 1991: Replaced mutually-recursive calls with in-line code
** for the star character.
**
** Special thanks to Lars Mathiesen <thorinn@diku.dk> for the ABORT code.
** This can greatly speed up failing wildcard patterns. For example:
** pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-*
** text 1: -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1
** text 2: -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1
** Text 1 matches with 51 calls, while text 2 fails with 54 calls. Without
** the ABORT code, it takes 22310 calls to fail. Ugh. The following
** explanation is from Lars:
** The precondition that must be fulfilled is that DoMatch will consume
** at least one character in text. This is true if *p is neither '*' nor
** '\0'.) The last return has ABORT instead of FALSE to avoid quadratic
** behaviour in cases like pattern "*a*b*c*d" with text "abcxxxxx". With
** FALSE, each star-loop has to run to the end of the text; with ABORT
** only the last one does.
**
** Once the control of one instance of DoMatch enters the star-loop, that
** instance will return either TRUE or ABORT, and any calling instance
** will therefore return immediately after (without calling recursively
** again). In effect, only one star-loop is ever active. It would be
** possible to modify the code to maintain this context explicitly,
** eliminating all recursive calls at the cost of some complication and
** loss of clarity (and the ABORT stuff seems to be unclear enough by
** itself). I think it would be unwise to try to get this into a
** released version unless you have a good test data base to try it
** out on.
*/
#include "cs_config.h"
#include <ctype.h>
#include "neo_misc.h"
#define ABORT -1
/* What character marks an inverted character class? */
#define NEGATE_CLASS '^'
/* Is "*" a common pattern? */
#define OPTIMIZE_JUST_STAR
/* Do tar(1) matching rules, which ignore a trailing slash? */
#undef MATCH_TAR_PATTERN
/*
** Match text and p, return TRUE, FALSE, or ABORT.
*/
static int
DoMatch(register const char *text, register const char *p)
{
register int last;
register int matched;
register int reverse;
for ( ; *p; text++, p++) {
if (*text == '\0' && *p != '*')
return ABORT;
switch (*p) {
case '\\':
/* Literal match with following character. */
p++;
/* FALLTHROUGH */
default:
if (*text != *p)
return FALSE;
continue;
case '?':
/* Match anything. */
continue;
case '*':
while (*++p == '*')
/* Consecutive stars act just like one. */
continue;
if (*p == '\0')
/* Trailing star matches everything. */
return TRUE;
while (*text)
if ((matched = DoMatch(text++, p)) != FALSE)
return matched;
return ABORT;
case '[':
reverse = p[1] == NEGATE_CLASS ? TRUE : FALSE;
if (reverse)
/* Inverted character class. */
p++;
matched = FALSE;
if (p[1] == ']' || p[1] == '-')
if (*++p == *text)
matched = TRUE;
for (last = *p; *++p && *p != ']'; last = *p)
/* This next line requires a good C compiler. */
if (*p == '-' && p[1] != ']'
? *text <= *++p && *text >= last : *text == *p)
matched = TRUE;
if (matched == reverse)
return FALSE;
continue;
}
}
#ifdef MATCH_TAR_PATTERN
if (*text == '/')
return TRUE;
#endif /* MATCH_TAR_ATTERN */
return *text == '\0';
}
/*
** Match text and p, return TRUE, FALSE, or ABORT.
*/
static int
DoMatchCaseInsensitive(register const char *text, register const char *p)
{
register int last;
register int matched;
register int reverse;
for ( ; *p; text++, p++) {
if (*text == '\0' && *p != '*')
return ABORT;
switch (*p) {
case '\\':
/* Literal match with following character. */
p++;
/* FALLTHROUGH */
default:
if (toupper(*text) != toupper(*p))
return FALSE;
continue;
case '?':
/* Match anything. */
continue;
case '*':
while (*++p == '*')
/* Consecutive stars act just like one. */
continue;
if (*p == '\0')
/* Trailing star matches everything. */
return TRUE;
while (*text)
if ((matched = DoMatchCaseInsensitive(text++, p)) != FALSE)
return matched;
return ABORT;
case '[':
reverse = p[1] == NEGATE_CLASS ? TRUE : FALSE;
if (reverse)
/* Inverted character class. */
p++;
matched = FALSE;
if (p[1] == ']' || p[1] == '-')
if (toupper(*++p) == toupper(*text))
matched = TRUE;
for (last = toupper(*p); *++p && *p != ']'; last = toupper(*p))
/* This next line requires a good C compiler. */
if (*p == '-' && p[1] != ']'
? toupper(*text) <= toupper(*++p) && toupper(*text) >= last : toupper(*text) == toupper(*p))
matched = TRUE;
if (matched == reverse)
return FALSE;
continue;
}
}
#ifdef MATCH_TAR_PATTERN
if (*text == '/')
return TRUE;
#endif /* MATCH_TAR_ATTERN */
return *text == '\0';
}
/*
** User-level routine. Returns TRUE or FALSE.
*/
int
wildmat(const char *text, const char *p)
{
#ifdef OPTIMIZE_JUST_STAR
if (p[0] == '*' && p[1] == '\0')
return TRUE;
#endif /* OPTIMIZE_JUST_STAR */
return DoMatch(text, p) == TRUE;
}
/*
** User-level routine. Returns TRUE or FALSE.
*/
int
wildmatcase(const char *text, const char *p)
{
#ifdef OPTIMIZE_JUST_STAR
if (p[0] == '*' && p[1] == '\0')
return TRUE;
#endif /* OPTIMIZE_JUST_STAR */
return DoMatchCaseInsensitive(text, p) == TRUE;
}
|