File: FXRex.h

package info (click to toggle)
gogglesmm 1.2.5-6
  • links: PTS
  • area: main
  • in suites: forky, sid
  • size: 16,812 kB
  • sloc: cpp: 231,960; ansic: 893; xml: 222; makefile: 33
file content (220 lines) | stat: -rw-r--r-- 10,160 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
/********************************************************************************
*                                                                               *
*                 R e g u l a r   E x p r e s s i o n   C l a s s               *
*                                                                               *
*********************************************************************************
* Copyright (C) 1999,2023 by Jeroen van der Zijp.   All Rights Reserved.        *
*********************************************************************************
* This library is free software; you can redistribute it and/or modify          *
* it under the terms of the GNU Lesser General Public License as published by   *
* the Free Software Foundation; either version 3 of the License, or             *
* (at your option) any later version.                                           *
*                                                                               *
* This library is distributed in the hope that it will be useful,               *
* but WITHOUT ANY WARRANTY; without even the implied warranty of                *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the                 *
* GNU Lesser General Public License for more details.                           *
*                                                                               *
* You should have received a copy of the GNU Lesser General Public License      *
* along with this program.  If not, see <http://www.gnu.org/licenses/>          *
********************************************************************************/
#ifndef FXREX_H
#define FXREX_H

namespace FX {


/**
* FXRex is a regular expression class implementing a NFA matcher.
* It supports capturing parentheses, non-capturing parentheses, positive or negative
* lookahead, backreferences, case-insensitive matching, counted repetitions, greedy, lazy and
* possessive matches, and PERL-like matching operators.
* The subject string may be searched forwards or backwards, and may contain any of
* 256 possible byte values.
*
* When parsing a regular expression pattern, the mode parameter is the bitwise OR of
* a set of flags and affects the match algorithm.  Passing the flag Capture enables
* capturing parentheses and back references, and allows the matcher engine to return
* the locations of the string matching these sub-patterns. The flag IgnoreCase enables
* case-insensitive matching.
*
* When the flag Newline is passed, newlines are treated like normal characters, and
* not line-separators.  If Newline flag is not passed, character classes such as '.',
* '\D', '\s', [^a-z] etc. will NOT match newlines.  The flag Verbatim disables all
* special character interpretation, making the entire pattern a literal string to be
* matched against a string.
*
* When the Exact flag is passed, a match succeeds only if the entire string is matched,
* i.e. the entire input presented to FXRex must match against the pattern; otherwise,
* only a (possibly empty) substring of the input is matched against the pattern.
* If the NotEmpty flag is passed, the pattern must match at least one character in order
* to succeed, and empty matches are considered non-matching.
*
* If the flag Syntax will check the pattern for correct syntax only, and not generate a
* matching engine; it will just reset the engine to the empty pattern; use this flag to
* verify the syntax of the pattern without compiling it.
*
* When matching a compiled pattern, the mode parameter is the bitwise OR of a set of
* flags that affects how the match is performed.  Passing the flags NotBol and/or NotEol
* causes the begin and end of the subject string NOT to be considered a line start or
* line end.
*
* Patterns which cause inordinate amounts of recursion may cause FXRex to fail where
* otherwise it would succeed to match.
* FXRex uses no global variables, and thus multiple threads may simultaneously use it;
* moreover, multiple threads may use the same instance to perform a match.
*/
class FXAPI FXRex {
private:
  FXString code;
private:
  static const FXchar *const errors[];
public:

  /// Regular expression flags
  enum {

    /// Flags for both parse and match mode
    Normal     = 0,     /// Normal mode (default)
    Unicode    = 1,     /// Unicode mode

    /// Regular expression parse flags
    Syntax     = 2,     /// Perform syntax check only
    Verbatim   = 4,     /// Literal pattern mode with no magic characters
    Capture    = 8,     /// Perform capturing parentheses
    IgnoreCase = 16,    /// Ignore case differences
    Newline    = 32,    /// Match-any operators match newline too
    Exact      = 64,    /// Exact match to entire string (\A..\Z)
    NotEmpty   = 128,   /// A successful match must not be empty
    Reverse    = 256,   /// Reverse expression mode
    Words      = 512,   /// Match whole words (\<..\>)

    /// Regular expression match flags
    NotBol     = 1024,  /// Start of string is NOT begin of line
    NotEol     = 2048   /// End of string is NOT end of line
    };

  /// Regular expression error codes
  enum Error {
    ErrOK      = 0,     /// No errors
    ErrEmpty   = 1,     /// Empty pattern
    ErrMore    = 2,     /// More characters after pattern
    ErrParent  = 3,     /// Unmatched parenthesis
    ErrBracket = 4,     /// Unmatched bracket
    ErrBrace   = 5,     /// Unmatched brace
    ErrRange   = 6,     /// Bad character range
    ErrEscape  = 7,     /// Bad escape sequence
    ErrCount   = 8,     /// Bad counted repeat
    ErrNoAtom  = 9,     /// No atom preceding repetition
    ErrRepeat  = 10,    /// Repeat following repeat
    ErrBackRef = 11,    /// Bad backward reference
    ErrClass   = 12,    /// Bad character class
    ErrComplex = 13,    /// Expression too complex
    ErrMemory  = 14,    /// Out of memory
    ErrToken   = 15,    /// Illegal token
    ErrLong    = 16,    /// Pattern too long
    ErrSupport = 17     /// Unsupported
    };

public:

  /**
  * Construct empty regular expression object, with the
  * fallback program installed.
  */
  FXRex();

  /**
  * Copy regular expression object  from another.
  */
  FXRex(const FXRex& orig);

  /// Compile expression from pattern; if error is not NULL, error code is returned
  FXRex(const FXchar* pattern,FXint mode=Normal,Error* error=nullptr);

  /// Compile expression from pattern; if error is not NULL, error code is returned
  FXRex(const FXString& pattern,FXint mode=Normal,Error* error=nullptr);

  /**
  * See if regular expression is empty; the regular expression
  * will be empty when it is unable to parse a pattern due to
  * a syntax error.
  */
  FXbool empty() const { return code.empty(); }

  /**
  * Parse pattern, return error code if syntax error is found.
  * The parse-mode flags control the compile options, and affect how
  * the generated matcher behaves.
  * If a parse fails, an error code is returned; in this case, the
  * expression matcher will be set up to a fallback program.
  */
  Error parse(const FXchar* pattern,FXint mode=Normal);
  Error parse(const FXString& pattern,FXint mode=Normal);

  /**
  * Perform anchored match of subject string of length len at position pos, returning true
  * if the pattern matches at this point.
  * If there is a match, the pattern and subpatterns are captured in the arrays beg[] and end[]
  * which must both be at least npar entries long.
  */
  FXbool amatch(const FXchar* string,FXint len,FXint pos=0,FXint mode=Normal,FXint* beg=nullptr,FXint* end=nullptr,FXint npar=0) const;
  FXbool amatch(const FXString& string,FXint pos=0,FXint mode=Normal,FXint* beg=nullptr,FXint* end=nullptr,FXint npar=0) const;


  /**
  * Search subject string of length len for a pattern, returning the location where the pattern
  * is found relative from the start of the string, or -1 if there is no match.
  * In case of a successful match, the pattern and subpatterns are captured in the arrays beg[] and end[]
  * which must be at least npar entries long.
  * The string is searched forwards (or backwards) starting from position fm toward to, both of which
  * must lie inside the string.
  */
  FXint search(const FXchar* string,FXint len,FXint fm,FXint to,FXint mode=Normal,FXint* beg=nullptr,FXint* end=nullptr,FXint npar=0) const;
  FXint search(const FXString& string,FXint fm,FXint to,FXint mode=Normal,FXint* beg=nullptr,FXint* end=nullptr,FXint npar=0) const;

  /**
  * After performing a regular expression match with capturing parentheses,
  * a substitution string is build from the replace string, where where "&"
  * is replaced by the entire matched pattern, and "\1" through "\9" are
  * replaced by captured expressions.  The original source string and its
  * length, and the match arrays beg and end must be passed.
  * The replace string may also contain regular escape sequences to embed special
  * characters.
  */
  static FXString substitute(const FXchar* string,FXint len,FXint* beg,FXint* end,const FXchar* replace,FXint npar=1);
  static FXString substitute(const FXchar* string,FXint len,FXint* beg,FXint* end,const FXString& replace,FXint npar=1);
  static FXString substitute(const FXString& string,FXint* beg,FXint* end,const FXchar* replace,FXint npar=1);
  static FXString substitute(const FXString& string,FXint* beg,FXint* end,const FXString& replace,FXint npar=1);

  /// Returns error message text for a given error code
  static const FXchar* getError(Error err){ return errors[err]; }

  /// Assign another regular expression to this one
  FXRex& operator=(const FXRex& orig);

  /// Comparison operators
  FXbool operator==(const FXRex& rex) const;
  FXbool operator!=(const FXRex& rex) const;

  /// Saving and loading
  friend FXAPI FXStream& operator<<(FXStream& store,const FXRex& s);
  friend FXAPI FXStream& operator>>(FXStream& store,FXRex& s);

  /**
  * Clear the expression object and reinstate the fallback program.
  */
  void clear();

  /// Delete
 ~FXRex();
  };


// Serialization
extern FXAPI FXStream& operator<<(FXStream& store,const FXRex& s);
extern FXAPI FXStream& operator>>(FXStream& store,FXRex& s);

}

#endif