1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369
|
/*
* Copyright (c) 2002 by The XFree86 Project, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE XFREE86 PROJECT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Except as contained in this notice, the name of the XFree86 Project shall
* not be used in advertising or otherwise to promote the sale, use or other
* dealings in this Software without prior written authorization from the
* XFree86 Project.
*
* Author: Paulo César Pereira de Andrade
*/
/* $XFree86: xc/programs/xedit/lisp/re/rep.h,v 1.2 2002/11/15 07:01:33 paulo Exp $ */
#include "re.h"
#ifndef _rep_h
#define _rep_h
/*
* Local defines
*/
#ifdef MIN
#undef MIN
#endif
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#ifdef MAX
#undef MAX
#endif
#define MAX(a, b) ((a) > (b) ? (a) : (b))
/* This value can not be larger than 255, a depth value is the nesting of
* repetition operations and alternatives. The number of nested parenthesis
* does not matter, but a repetition on the pattern inside the parenthesis
* does. Note also that you cannot have more than 9 parenthesis pairs in
* an expression.
* Depth is always at least 1. So for MAX_DEPTH 8, it is only allowed
* 7 complex repetitions. A complex repetition is a dot followed by an
* repetition operator. It is called a complex repetition because dot
* matches anything but the empty string, so the engine needs to test
* all possible combinations until the end of the string is found.
* Repetitions like .* use one depth until the end of the string is found,
* for example a.*b.*c.*d has depth 4, while a*b*c*d has depth 2.
*/
#define MAX_DEPTH 8
/* Minimum number of strings to generate a "large" string list, that is,
* sort the strings and allocate 512 extra bytes to map the first string
* with a given initial byte. */
#define LARGE_STL_COUNT 16
/*
* Local types
*/
/* Intermediate compilation types declaration */
/* (r)egular (e)xpression (c)ompile (c)a(se) */
typedef struct _rec_cse rec_cse;
/* (r)egular (e)xpression (c)ompile (r)a(ng)e */
typedef struct _rec_rng rec_rng;
/* (r)egular (e)xpression (c)ompile (pat)tern */
typedef struct _rec_pat rec_pat;
/* (r)egular (e)xpression (c)ompile (rep)etition */
typedef struct _rec_rep rec_rep;
/* (r)egular (e)xpression (c)ompile (gr)ou(p) */
typedef struct _rec_grp rec_grp;
/* (r)egular (e)xpression (c)ompile (alt)ernatives */
typedef struct _rec_alt rec_alt;
/* Optimization types */
/* (r)egular (e)xpression (c)ompile (st)ring (l)ist */
typedef struct _rec_stl rec_stl;
/* Final compilation and execution types */
/* (re)gular expression (inf)ormation */
typedef struct _re_inf re_inf;
/* (re)gular expression (eng)ine */
typedef struct _re_eng re_eng;
/* Codes used by the engine */
typedef enum {
/* Grouping */
Re_Open, /* ( */
Re_Close, /* ) */
Re_Update, /* Like Re_Close, but is inside a loop */
/* Alternatives */
Re_Alt, /* Start alternative list, + next offset */
Re_AltNext, /* Next alternative, + next offset */
Re_AltDone, /* Finish alternative list */
/* Repetition */
Re_AnyTimes, /* * */
Re_Maybe, /* ? */
Re_AtLeast, /* +, at least one */
/* Repetition like */
Re_AnyAnyTimes, /* .*<re> */
Re_AnyMaybe, /* .?<re> */
Re_AnyAtLeast, /* .+<re> */
Re_AnyEatAnyTimes, /* Expression ends with .* */
Re_AnyEatMaybe, /* Expression ends with .? */
Re_AnyEatAtLeast, /* Expression ends with .+ */
/* Repetition with arguments */
Re_Exact, /* {e} */
Re_Min, /* {n,} */
Re_Max, /* {,m} */
Re_MinMax, /* {n,m} */
/* Repetition helper instruction */
Re_RepJump, /* Special code, go back to repetition */
Re_RepLongJump, /* Jump needs two bytes */
/* After the repetition data, all repetitions have an offset
* to the code after the repetition */
/* Matching */
Re_Any, /* . */
Re_Odigit, /* \o */
Re_OdigitNot, /* \O */
Re_Digit, /* \d */
Re_DigitNot, /* \D */
Re_Xdigit, /* \x */
Re_XdigitNot, /* \x */
Re_Space, /* \s */
Re_SpaceNot, /* \S */
Re_Tab, /* \t */
Re_Newline, /* \n */
Re_Lower, /* \l */
Re_Upper, /* \u */
Re_Alnum, /* \w */
Re_AlnumNot, /* \W */
Re_Control, /* \c */
Re_ControlNot, /* \C */
Re_Bol, /* ^ */
Re_Eol, /* $ */
Re_Bow, /* \< */
Re_Eow, /* \> */
/* Range matching information */
Re_Range, /* + 256 bytes */
Re_RangeNot, /* + 256 bytes */
/* Matching with arguments */
Re_Literal, /* + character */
Re_CaseLiteral, /* + lower + upper */
Re_LiteralNot, /* + character */
Re_CaseLiteralNot, /* + lower + upper */
Re_String, /* + length + string */
Re_CaseString, /* + length + string in format lower-upper */
/* These are useful to start matching, or when RE_NOSPEC is used. */
Re_SearchLiteral,
Re_SearchCaseLiteral,
Re_SearchString,
Re_SearchCaseString,
Re_StringList, /* + total-length + lengths + strings */
Re_CaseStringList, /* + total-length + lengths + strings */
Re_LargeStringList, /* + total-length + lengths + map + strings */
Re_LargeCaseStringList, /* + total-length + lengths + map + strings */
/* Backreference */
Re_Backref, /* + reference number */
/* The last codes */
Re_DoneIf, /* Done if at end of input */
Re_MaybeDone, /* Done */
Re_Done /* If this code found, finished execution */
} ReCode;
/* (r)egular (e)xpresssion (pat)rern (t)ype */
typedef enum _rec_pat_t {
Rep_Literal = Re_Literal,
Rep_CaseLiteral = Re_CaseLiteral,
Rep_LiteralNot = Re_LiteralNot,
Rep_CaseLiteralNot = Re_CaseLiteralNot,
Rep_Range = Re_Range,
Rep_RangeNot = Re_RangeNot,
Rep_String = Re_String,
Rep_CaseString = Re_CaseString,
Rep_SearchLiteral = Re_SearchLiteral,
Rep_SearchCaseLiteral = Re_SearchCaseLiteral,
Rep_SearchString = Re_SearchString,
Rep_SearchCaseString = Re_SearchCaseString,
Rep_Any = Re_Any,
Rep_AnyAnyTimes = Re_AnyAnyTimes,
Rep_AnyEatAnyTimes = Re_AnyEatAnyTimes,
Rep_AnyMaybe = Re_AnyMaybe,
Rep_AnyEatMaybe = Re_AnyEatMaybe,
Rep_AnyAtLeast = Re_AnyAtLeast,
Rep_AnyEatAtLeast = Re_AnyEatAtLeast,
Rep_Odigit = Re_Odigit,
Rep_OdigitNot = Re_OdigitNot,
Rep_Digit = Re_Digit,
Rep_DigitNot = Re_DigitNot,
Rep_Xdigit = Re_Xdigit,
Rep_XdigitNot = Re_XdigitNot,
Rep_Space = Re_Space,
Rep_SpaceNot = Re_SpaceNot,
Rep_Tab = Re_Tab,
Rep_Newline = Re_Newline,
Rep_Lower = Re_Lower,
Rep_Upper = Re_Upper,
Rep_Alnum = Re_Alnum,
Rep_AlnumNot = Re_AlnumNot,
Rep_Control = Re_Control,
Rep_ControlNot = Re_ControlNot,
Rep_Bol = Re_Bol,
Rep_Eol = Re_Eol,
Rep_Bow = Re_Bow,
Rep_Eow = Re_Eow,
Rep_Backref = Re_Backref,
Rep_StringList = Re_StringList,
Rep_Group = Re_Open
} rec_pat_t;
/* (r)egular (e)xpression (rep)etition (t)ype */
typedef enum _rec_rep_t {
Rer_AnyTimes = Re_AnyTimes,
Rer_AtLeast = Re_AtLeast,
Rer_Maybe = Re_Maybe,
Rer_Exact = Re_Exact,
Rer_Min = Re_Min,
Rer_Max = Re_Max,
Rer_MinMax = Re_MinMax
} rec_rep_t;
/* Decide at re compilation time what is lowercase and what is uppercase */
struct _rec_cse {
unsigned char lower;
unsigned char upper;
};
/* A rec_rng is used only during compilation, just a character map */
struct _rec_rng {
unsigned char range[256];
};
/* A rec_pat is used only during compilation, and can be viewed as
* a regular expression element like a match to any character, a match
* to the beginning or end of the line, etc.
* It is implemented as a linked list, and does not have nesting.
* The data field can contain:
* chr: the value of a single character to match.
* cse: the upper and lower case value of a character to match.
* rng: a character map to match or not match.
* str: a simple string or a string where every two bytes
* represents the character to match, in lower/upper
* case sequence.
* The rep field is not used for strings, strings are broken in the
* last character in this case. That is, strings are just a concatenation
* of several character matches.
*/
struct _rec_pat {
rec_pat_t type;
rec_pat *next, *prev; /* Linked list information */
union {
unsigned char chr;
rec_cse cse;
rec_rng *rng;
rec_grp *grp;
unsigned char *str;
rec_stl *stl;
} data;
rec_rep *rep; /* Pattern repetition information */
};
/* A rec_rep is used only during compilation, and can be viewed as:
*
* ? or * or + or {<e>} or {<m>,} or {,<M>} or {<m>,<M>}
*
* where <e> is "exact", <m> is "minimum" and <M> is "maximum".
* In the compiled step it can also be just a NULL pointer, that
* is actually equivalent to {1}.
*/
struct _rec_rep {
rec_rep_t type;
short mine; /* minimum or exact number of matches */
short maxc; /* maximum number of matches */
};
/* A rec_alt is used only during compilation, and can be viewed as:
*
* <re>|<re>
*
* where <re> is any regular expression. The expressions are nested
* using the grp field of the rec_pat structure.
*/
struct _rec_alt {
rec_alt *next, *prev; /* Linked list information */
rec_pat *pat;
};
/* A rec_grp is a place holder for expressions enclosed in parenthesis
* and is linked to the compilation data by an rec_pat structure. */
struct _rec_grp {
rec_pat *parent; /* Reference to parent pattern */
rec_alt *alt; /* The pattern information */
rec_alt *palt; /* Parent alternative */
rec_grp *pgrp; /* Nested groups */
int comp; /* (comp)lex repetition pattern inside group */
};
/* Optimization compilation types definition */
/* (r)egular (e)xpression (c)ompile (st)ring (l)ist (t)ype */
typedef enum {
Resl_StringList = Re_StringList,
Resl_CaseStringList = Re_CaseStringList
} rec_stl_t;
struct _rec_stl {
rec_stl_t type;
int nstrs; /* Number of strings in list */
int tlen; /* Total length of all strings */
unsigned char *lens; /* Vector of string lengths */
unsigned char **strs; /* The strings */
};
/*
* Prototypes
*/
/* rep.c */
rec_alt *irec_comp(const char*, const char*, int, int*);
void irec_free_alt(rec_alt*);
/* reo.c */
int orec_comp(rec_alt*, int);
void orec_free_stl(rec_stl*);
#endif /* _rep_h */
|