File: regexp_facade.h

package info (click to toggle)
mysql-8.0 8.0.44-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,272,892 kB
  • sloc: cpp: 4,685,345; ansic: 412,712; pascal: 108,395; java: 83,641; perl: 30,221; cs: 27,067; sql: 26,594; python: 21,816; sh: 17,285; yacc: 17,169; php: 11,522; xml: 7,388; javascript: 7,083; makefile: 1,793; lex: 1,075; awk: 670; asm: 520; objc: 183; ruby: 97; lisp: 86
file content (199 lines) | stat: -rw-r--r-- 6,970 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
#ifndef SQL_REGEXP_REGEXP_FACADE_H_
#define SQL_REGEXP_REGEXP_FACADE_H_

/* Copyright (c) 2017, 2025, Oracle and/or its affiliates.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License, version 2.0,
   as published by the Free Software Foundation.

   This program is designed to work with certain software (including
   but not limited to OpenSSL) that is licensed under separate terms,
   as designated in a particular file or component or in included license
   documentation.  The authors of MySQL hereby grant you an additional
   permission to link the program and your derivative works with the
   separately licensed software that they have either included with
   the program or referenced in the documentation.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License, version 2.0, for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */

/**
  @file regexp_facade.h

  This file hides most of ICU from the Item_func_regexp subclasses.
*/

#include <stdint.h>

#include <optional>
#include <string>

#include "sql/item.h"
#include "sql/regexp/regexp_engine.h"
#include "sql_string.h"

extern int32_t opt_regexp_time_limit;
extern int32_t opt_regexp_stack_limit;

namespace regexp {

/**
  This class handles

  - Conversion to the regexp library's character set, and buffers the
    converted strings during matching.

  - Re-compilation of the regular expression in case the pattern is a field
    reference or otherwise non-constant.

  - `NULL` handling.

  - Conversion between indexing conventions. Clients of this class can use
    one-based indexing, while the classes used by this class use zero-based
    indexing.
*/
class Regexp_facade {
 public:
  /**
    Sets the pattern if called for the first time or the pattern_expr is
    non-constant. This function is meant to be called for every row in a
    command such as

      SELECT regexp_like( column, 'a+' ) FROM table;

    In this case, the client of this class may call SetPattern() for every
    row without paying any penalty, as this becomes a no-op for all
    consecutive calls. In cases such as

      SELECT regexp_like( column, regexp_column ) FROM table;

    The `regexp_column` expression is non-constant and hence we have to
    recompile the regular expression for each row.
  */
  bool SetPattern(Item *pattern_expr, uint32_t flags);

  /**
    Tries to match the subject against the compiled regular expression.

    @param subject_expr Is evaluated into a string to search.
    @param start Start position, 1-based.
    @param occurrence Which occurrence of the pattern should be searched for.

    @retval true A match was found.
    @retval false A match was not found.

    @retval nullptr Either the engine was not compiled, or subject_expr
    evaluates to NULL. This is useful for the Item_func_regexp object, since it
    doesn't have to make a special case for when the regular expression is
    NULL. Instead, the case is handled here in the facade.
  */
  std::optional<bool> Matches(Item *subject_expr, int start, int occurrence);

  /**
    Searches the subject for a match of the compiled regular expression and
    returns a position.

    @param subject_expr The string to search.
    @param start Start position, 1-based.
    @param occurrence Which occurrence of the pattern should be searched for.
    @param after_match If true, the position following the end of the match
    is returned. If false, the position before the match is returned.

    @return The first character of the match, or a null value if not found.
  */
  std::optional<int> Find(Item *subject_expr, int start, int occurrence,
                          bool after_match);

  /**
    @param subject_expr The string to search.
    @param replacement_expr The string to replace the match with.
    @param start Start position, 1-based.
    @param occurrence Which occurrence of the pattern should be searched for.
    @param[in,out] result Holds the buffer for writing the result.
  */
  String *Replace(Item *subject_expr, Item *replacement_expr, int start,
                  int occurrence, String *result);

  String *Substr(Item *subject_expr, int start, int occurrence, String *result);

  /// Delete the "engine" data structure after execution.
  void cleanup() { m_engine = nullptr; }

  /// Did any operation return a warning? For unit testing.
  bool EngineHasWarning() const {
    return m_engine != nullptr && m_engine->HasWarning();
  }

 private:
  /**
    Resets the compiled regular expression with a new string.

    @param subject_expr The new string to search.
    @param start If present, start on this code point.

    @retval false OK.
    @retval true Either there is no compiled regular expression, or the
    expression evaluated to `NULL`.
  */
  bool Reset(Item *subject_expr, int start = 1);

  /**
    Actually compiles the regular expression.
  */
  bool SetupEngine(Item *pattern_expr, uint flags);

  /**
    Converts a string position in m_current_subject.
    @param position One-based code point position.
    @return Zero-based byte position.
  */
  int ConvertCodePointToLibPosition(int position) const;

  /**
    Converts a string position in m_current_subject.
    @param position Zero-based UTF-16 position.
    @return Zero-based code point position.
  */
  int ConvertLibPositionToCodePoint(int position) const;

  /**
    Helper function for setting the result from SQL regular expression
    functions that return a string value. Depending on character sets used by
    arguments and result, this function may copy, convert or just set the
    result. In particular, it handles the special case of the BINARY character
    set being interpreted as CP-1252.

     @param str The result string from the regexp function.
     @param length Length in bytes.
     @param[out] result The result string.
     @return A pointer to the same string as the argument, or nullptr in case of
    failure.
   */
  String *AssignResult(const char *str, size_t length, String *result);

  /**
    Used for all the actual regular expression matching, search-and-replace,
    and positional and string information. If either the regular expression
    pattern or the subject is `NULL`, this pointer is empty.
  */
  unique_ptr_destroy_only<Regexp_engine> m_engine;

  /**
    ICU does not copy the subject string, so we keep the subject buffer
    here. A call to Reset() causes it to be overwritten.

    @see Regexp_engine::reset()
  */
  std::u16string m_current_subject;
};

}  // namespace regexp

#endif  // SQL_REGEXP_REGEXP_FACADE_H_