1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
|
<?php
/**
* Lexer adapted from Simple Test: http://sourceforge.net/projects/simpletest/
* For an intro to the Lexer see:
* https://web.archive.org/web/20120125041816/http://www.phppatterns.com/docs/develop/simple_test_lexer_notes
*
* @author Marcus Baker http://www.lastcraft.com
*/
namespace dokuwiki\Parsing\Lexer;
/**
* Compounded regular expression.
*
* Any of the contained patterns could match and when one does it's label is returned.
*/
class ParallelRegex
{
/** @var string[] patterns to match */
protected $patterns = [];
/** @var string[] labels for above patterns */
protected $labels = [];
/** @var string the compound regex matching all patterns */
protected $regex;
/** @var bool case sensitive matching? */
protected $case;
/**
* Constructor. Starts with no patterns.
*
* @param boolean $case True for case sensitive, false
* for insensitive.
*/
public function __construct($case)
{
$this->case = $case;
}
/**
* Adds a pattern with an optional label.
*
* @param mixed $pattern Perl style regex. Must be UTF-8
* encoded. If its a string, the (, )
* lose their meaning unless they
* form part of a lookahead or
* lookbehind assertation.
* @param bool|string $label Label of regex to be returned
* on a match. Label must be ASCII
*/
public function addPattern($pattern, $label = true)
{
$count = count($this->patterns);
$this->patterns[$count] = $pattern;
$this->labels[$count] = $label;
$this->regex = null;
}
/**
* Attempts to match all patterns at once against a string.
*
* @param string $subject String to match against.
* @param string $match First matched portion of
* subject.
* @return bool|string False if no match found, label if label exists, true if not
*/
public function apply($subject, &$match)
{
if (count($this->patterns) == 0) {
return false;
}
if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) {
$match = "";
return false;
}
$match = $matches[0];
$size = count($matches);
// FIXME this could be made faster by storing the labels as keys in a hashmap
for ($i = 1; $i < $size; $i++) {
if ($matches[$i] && isset($this->labels[$i - 1])) {
return $this->labels[$i - 1];
}
}
return true;
}
/**
* Attempts to split the string against all patterns at once
*
* @param string $subject String to match against.
* @param array $split The split result: array containing, pre-match, match & post-match strings
* @return boolean True on success.
*
* @author Christopher Smith <chris@jalakai.co.uk>
*/
public function split($subject, &$split)
{
if (count($this->patterns) == 0) {
return false;
}
if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) {
if (function_exists('preg_last_error')) {
$err = preg_last_error();
switch ($err) {
case PREG_BACKTRACK_LIMIT_ERROR:
msg('A PCRE backtrack error occured. Try to increase the pcre.backtrack_limit in php.ini', -1);
break;
case PREG_RECURSION_LIMIT_ERROR:
msg('A PCRE recursion error occured. Try to increase the pcre.recursion_limit in php.ini', -1);
break;
case PREG_BAD_UTF8_ERROR:
msg('A PCRE UTF-8 error occured. This might be caused by a faulty plugin', -1);
break;
case PREG_INTERNAL_ERROR:
msg('A PCRE internal error occured. This might be caused by a faulty plugin', -1);
break;
}
}
$split = [$subject, "", ""];
return false;
}
$idx = count($matches) - 2;
[$pre, $post] = preg_split($this->patterns[$idx] . $this->getPerlMatchingFlags(), $subject, 2);
$split = [$pre, $matches[0], $post];
return $this->labels[$idx] ?? true;
}
/**
* Compounds the patterns into a single
* regular expression separated with the
* "or" operator. Caches the regex.
* Will automatically escape (, ) and / tokens.
*
* @return null|string
*/
protected function getCompoundedRegex()
{
if ($this->regex == null) {
$cnt = count($this->patterns);
for ($i = 0; $i < $cnt; $i++) {
/*
* decompose the input pattern into "(", "(?", ")",
* "[...]", "[]..]", "[^]..]", "[...[:...:]..]", "\x"...
* elements.
*/
preg_match_all('/\\\\.|' .
'\(\?|' .
'[()]|' .
'\[\^?\]?(?:\\\\.|\[:[^]]*:\]|[^]\\\\])*\]|' .
'[^[()\\\\]+/', $this->patterns[$i], $elts);
$pattern = "";
$level = 0;
foreach ($elts[0] as $elt) {
/*
* for "(", ")" remember the nesting level, add "\"
* only to the non-"(?" ones.
*/
switch ($elt) {
case '(':
$pattern .= '\(';
break;
case ')':
if ($level > 0)
$level--; /* closing (? */
else $pattern .= '\\';
$pattern .= ')';
break;
case '(?':
$level++;
$pattern .= '(?';
break;
default:
if (str_starts_with($elt, '\\'))
$pattern .= $elt;
else $pattern .= str_replace('/', '\/', $elt);
}
}
$this->patterns[$i] = "($pattern)";
}
$this->regex = "/" . implode("|", $this->patterns) . "/" . $this->getPerlMatchingFlags();
}
return $this->regex;
}
/**
* Accessor for perl regex mode flags to use.
* @return string Perl regex flags.
*/
protected function getPerlMatchingFlags()
{
return ($this->case ? "msS" : "msSi");
}
}
|