1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
|
/*
* Cppcheck - A tool for static C/C++ code analysis
* Copyright (C) 2007-2025 Cppcheck team.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifdef HAVE_RULES
#include "regex.h"
#include <utility>
#ifdef _WIN32
#define PCRE_STATIC
#endif
#include <pcre.h>
namespace {
std::string pcreErrorCodeToString(const int pcreExecRet)
{
switch (pcreExecRet) {
case PCRE_ERROR_NULL:
return "Either code or subject was passed as NULL, or ovector was NULL "
"and ovecsize was not zero (PCRE_ERROR_NULL)";
case PCRE_ERROR_BADOPTION:
return "An unrecognized bit was set in the options argument (PCRE_ERROR_BADOPTION)";
case PCRE_ERROR_BADMAGIC:
return "PCRE stores a 4-byte \"magic number\" at the start of the compiled code, "
"to catch the case when it is passed a junk pointer and to detect when a "
"pattern that was compiled in an environment of one endianness is run in "
"an environment with the other endianness. This is the error that PCRE "
"gives when the magic number is not present (PCRE_ERROR_BADMAGIC)";
case PCRE_ERROR_UNKNOWN_NODE:
return "While running the pattern match, an unknown item was encountered in the "
"compiled pattern. This error could be caused by a bug in PCRE or by "
"overwriting of the compiled pattern (PCRE_ERROR_UNKNOWN_NODE)";
case PCRE_ERROR_NOMEMORY:
return "If a pattern contains back references, but the ovector that is passed "
"to pcre_exec() is not big enough to remember the referenced substrings, "
"PCRE gets a block of memory at the start of matching to use for this purpose. "
"If the call via pcre_malloc() fails, this error is given. The memory is "
"automatically freed at the end of matching. This error is also given if "
"pcre_stack_malloc() fails in pcre_exec(). "
"This can happen only when PCRE has been compiled with "
"--disable-stack-for-recursion (PCRE_ERROR_NOMEMORY)";
case PCRE_ERROR_NOSUBSTRING:
return "This error is used by the pcre_copy_substring(), pcre_get_substring(), "
"and pcre_get_substring_list() functions (see below). "
"It is never returned by pcre_exec() (PCRE_ERROR_NOSUBSTRING)";
case PCRE_ERROR_MATCHLIMIT:
return "The backtracking limit, as specified by the match_limit field in a pcre_extra "
"structure (or defaulted) was reached. "
"See the description above (PCRE_ERROR_MATCHLIMIT)";
case PCRE_ERROR_CALLOUT:
return "This error is never generated by pcre_exec() itself. "
"It is provided for use by callout functions that want to yield a distinctive "
"error code. See the pcrecallout documentation for details (PCRE_ERROR_CALLOUT)";
case PCRE_ERROR_BADUTF8:
return "A string that contains an invalid UTF-8 byte sequence was passed as a subject, "
"and the PCRE_NO_UTF8_CHECK option was not set. If the size of the output vector "
"(ovecsize) is at least 2, the byte offset to the start of the the invalid UTF-8 "
"character is placed in the first element, and a reason code is placed in the "
"second element. The reason codes are listed in the following section. For "
"backward compatibility, if PCRE_PARTIAL_HARD is set and the problem is a truncated "
"UTF-8 character at the end of the subject (reason codes 1 to 5), "
"PCRE_ERROR_SHORTUTF8 is returned instead of PCRE_ERROR_BADUTF8";
case PCRE_ERROR_BADUTF8_OFFSET:
return "The UTF-8 byte sequence that was passed as a subject was checked and found to "
"be valid (the PCRE_NO_UTF8_CHECK option was not set), but the value of "
"startoffset did not point to the beginning of a UTF-8 character or the end of "
"the subject (PCRE_ERROR_BADUTF8_OFFSET)";
case PCRE_ERROR_PARTIAL:
return "The subject string did not match, but it did match partially. See the "
"pcrepartial documentation for details of partial matching (PCRE_ERROR_PARTIAL)";
case PCRE_ERROR_BADPARTIAL:
return "This code is no longer in use. It was formerly returned when the PCRE_PARTIAL "
"option was used with a compiled pattern containing items that were not supported "
"for partial matching. From release 8.00 onwards, there are no restrictions on "
"partial matching (PCRE_ERROR_BADPARTIAL)";
case PCRE_ERROR_INTERNAL:
return "An unexpected internal error has occurred. This error could be caused by a bug "
"in PCRE or by overwriting of the compiled pattern (PCRE_ERROR_INTERNAL)";
case PCRE_ERROR_BADCOUNT:
return "This error is given if the value of the ovecsize argument is negative "
"(PCRE_ERROR_BADCOUNT)";
case PCRE_ERROR_RECURSIONLIMIT:
return "The internal recursion limit, as specified by the match_limit_recursion "
"field in a pcre_extra structure (or defaulted) was reached. "
"See the description above (PCRE_ERROR_RECURSIONLIMIT)";
case PCRE_ERROR_DFA_UITEM:
return "PCRE_ERROR_DFA_UITEM";
case PCRE_ERROR_DFA_UCOND:
return "PCRE_ERROR_DFA_UCOND";
case PCRE_ERROR_DFA_WSSIZE:
return "PCRE_ERROR_DFA_WSSIZE";
case PCRE_ERROR_DFA_RECURSE:
return "PCRE_ERROR_DFA_RECURSE";
case PCRE_ERROR_NULLWSLIMIT:
return "PCRE_ERROR_NULLWSLIMIT";
case PCRE_ERROR_BADNEWLINE:
return "An invalid combination of PCRE_NEWLINE_xxx options was "
"given (PCRE_ERROR_BADNEWLINE)";
case PCRE_ERROR_BADOFFSET:
return "The value of startoffset was negative or greater than the length "
"of the subject, that is, the value in length (PCRE_ERROR_BADOFFSET)";
case PCRE_ERROR_SHORTUTF8:
return "This error is returned instead of PCRE_ERROR_BADUTF8 when the subject "
"string ends with a truncated UTF-8 character and the PCRE_PARTIAL_HARD option is set. "
"Information about the failure is returned as for PCRE_ERROR_BADUTF8. "
"It is in fact sufficient to detect this case, but this special error code for "
"PCRE_PARTIAL_HARD precedes the implementation of returned information; "
"it is retained for backwards compatibility (PCRE_ERROR_SHORTUTF8)";
case PCRE_ERROR_RECURSELOOP:
return "This error is returned when pcre_exec() detects a recursion loop "
"within the pattern. Specifically, it means that either the whole pattern "
"or a subpattern has been called recursively for the second time at the same "
"position in the subject string. Some simple patterns that might do this "
"are detected and faulted at compile time, but more complicated cases, "
"in particular mutual recursions between two different subpatterns, "
"cannot be detected until run time (PCRE_ERROR_RECURSELOOP)";
case PCRE_ERROR_JIT_STACKLIMIT:
return "This error is returned when a pattern that was successfully studied "
"using a JIT compile option is being matched, but the memory available "
"for the just-in-time processing stack is not large enough. See the pcrejit "
"documentation for more details (PCRE_ERROR_JIT_STACKLIMIT)";
case PCRE_ERROR_BADMODE:
return "This error is given if a pattern that was compiled by the 8-bit library "
"is passed to a 16-bit or 32-bit library function, or vice versa (PCRE_ERROR_BADMODE)";
case PCRE_ERROR_BADENDIANNESS:
return "This error is given if a pattern that was compiled and saved is reloaded on a "
"host with different endianness. The utility function pcre_pattern_to_host_byte_order() "
"can be used to convert such a pattern so that it runs on the new host (PCRE_ERROR_BADENDIANNESS)";
case PCRE_ERROR_DFA_BADRESTART:
return "PCRE_ERROR_DFA_BADRESTART";
#if PCRE_MAJOR >= 8 && PCRE_MINOR >= 32
case PCRE_ERROR_BADLENGTH:
return "This error is given if pcre_exec() is called with a negative value for the length argument (PCRE_ERROR_BADLENGTH)";
case PCRE_ERROR_JIT_BADOPTION:
return "This error is returned when a pattern that was successfully studied using a JIT compile "
"option is being matched, but the matching mode (partial or complete match) does not correspond "
"to any JIT compilation mode. When the JIT fast path function is used, this error may be "
"also given for invalid options. See the pcrejit documentation for more details (PCRE_ERROR_JIT_BADOPTION)";
#endif
}
return "unknown PCRE error " + std::to_string(pcreExecRet);
}
class PcreRegex : public Regex
{
public:
explicit PcreRegex(std::string pattern)
: mPattern(std::move(pattern))
{}
~PcreRegex() override
{
if (mExtra) {
pcre_free(mExtra);
mExtra = nullptr;
}
if (mRe) {
pcre_free(mRe);
mRe = nullptr;
}
}
std::string compile();
std::string match(const std::string& str, const MatchFn& match) const override;
private:
std::string mPattern;
pcre* mRe{};
pcre_extra* mExtra{};
};
std::string PcreRegex::compile()
{
if (mRe)
return "pcre_compile failed: regular expression has already been compiled";
const char *pcreCompileErrorStr = nullptr;
int erroffset = 0;
pcre * const re = pcre_compile(mPattern.c_str(),0,&pcreCompileErrorStr,&erroffset,nullptr);
if (!re) {
if (pcreCompileErrorStr)
return "pcre_compile failed: " + std::string(pcreCompileErrorStr);
return "pcre_compile failed: unknown error";
}
// Optimize the regex, but only if PCRE_CONFIG_JIT is available
#ifdef PCRE_CONFIG_JIT
const char *pcreStudyErrorStr = nullptr;
pcre_extra * const pcreExtra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &pcreStudyErrorStr);
// pcre_study() returns NULL for both errors and when it can not optimize the regex.
// The last argument is how one checks for errors.
// It is NULL if everything works, and points to an error string otherwise.
if (pcreStudyErrorStr) {
// pcre_compile() worked, but pcre_study() returned an error. Free the resources allocated by pcre_compile().
pcre_free(re);
return "pcre_study failed: " + std::string(pcreStudyErrorStr);
}
mExtra = pcreExtra;
#endif
mRe = re;
return "";
}
std::string PcreRegex::match(const std::string& str, const MatchFn& match) const
{
if (!mRe)
return "pcre_exec failed: regular expression has not been compiled yet";
int pos = 0;
int ovector[30]= {0};
while (pos < static_cast<int>(str.size())) {
const int pcreExecRet = pcre_exec(mRe, mExtra, str.c_str(), static_cast<int>(str.size()), pos, 0, ovector, 30);
if (pcreExecRet == PCRE_ERROR_NOMATCH)
return "";
if (pcreExecRet < 0) {
return "pcre_exec failed (pos: " + std::to_string(pos) + "): " + pcreErrorCodeToString(pcreExecRet);
}
const auto pos1 = static_cast<unsigned int>(ovector[0]);
const auto pos2 = static_cast<unsigned int>(ovector[1]);
match(pos1, pos2);
// jump to the end of the match for the next pcre_exec
pos = static_cast<int>(pos2);
}
return "";
}
}
std::shared_ptr<Regex> Regex::create(std::string pattern, std::string& err)
{
auto* regex = new PcreRegex(std::move(pattern));
err = regex->compile();
if (!err.empty()) {
delete regex;
return nullptr;
}
return std::shared_ptr<Regex>(regex);
}
#endif // HAVE_RULES
|