1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
|
//
// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2004-2008
//
// Copyright: See COPYING file that comes with this distribution
//
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "regexrulefactory.h"
#include "regexhighlightrule.h"
#include "tostringcollection.h"
#include "highlightstate.h"
#include "regexpreprocessor.h"
namespace srchilite {
/**
* Build a non-marking group (i.e., (? ... ) starting from s
* @param s
* @return
*/
static const std::string non_marking_group(const std::string &s) {
return "(?:" + s + ")";
}
/**
* Whether the passed string consists of a single char (escluding
* the escape char back slash)
* @param s
* @return
*/
static bool is_single_char(const std::string &s) {
if (s.size() && s[0] == '\\')
return (s.size() == 2);
return (s.size() == 1);
}
RegexRuleFactory::RegexRuleFactory() {
}
RegexRuleFactory::~RegexRuleFactory() {
}
HighlightRule *RegexRuleFactory::createSimpleRule(const std::string &name,
const std::string &s) {
return new RegexHighlightRule(name, s);
}
HighlightRule *RegexRuleFactory::createWordListRule(const std::string &name,
const WordList &list, bool caseSensitive) {
// \< and \> are word boundaries
// we must make a non-marking group since the string can contain
// alternative symbols. For instance,
// \<(?:class|for|else)\>
// correctly detects 'for' only in isolation, while
// (?:\<class|for|else\>)
// will not
std::string alternatives = toStringCollection(list, '|');
if (!caseSensitive)
alternatives = RegexPreProcessor::make_nonsensitive(alternatives);
return new RegexHighlightRule(name, "\\<" + non_marking_group(alternatives) + "\\>");
}
HighlightRule *RegexRuleFactory::createListRule(const std::string &name,
const WordList &list, bool caseSensitive) {
std::string alternatives = toStringCollection(list, '|');
if (!caseSensitive)
alternatives = RegexPreProcessor::make_nonsensitive(alternatives);
return new RegexHighlightRule(name, non_marking_group(alternatives));
}
HighlightRule *RegexRuleFactory::createLineRule(const std::string &name,
const std::string &start, const std::string &end,
const std::string &escape, bool nested) {
// if start or end is not a single char we cannot create a regexp directly:
// we need to use states, and this is achieved by switching to a multi line rule
if (nested || !is_single_char(start) || !is_single_char(end)) {
return createMultiLineRule(name, start, end, escape, nested);
}
std::string exp_string;
/*
in case the expression is not the start element of a
State/Environment and it must not spawn multiple lines, and the
delimiters are only one character, build a regular
expression of the shape
<startdelim>(everything but delimiters)<enddelim>
For instance if delimiters are "<" and ">" the built regular expression is
"<(?:[^<>])*>"
*/
if (!escape.size()) {
exp_string = start + non_marking_group("[^" + start
+ (end != start ? end : "") + "]") + "*" + end;
} else {
/*
in case of a specified escape character it will use it for the
(everything but delimiters) part.
For instace, if in the example above the escape character is the
backslash, the generated expression is
<(?:[^\\<\\>]|\\.)*>
*/
exp_string = start + non_marking_group("[^" + escape + start + (end
!= start ? escape + end : "") + "]|"+ escape + ".") + "*" + end;
}
return createSimpleRule(name, exp_string);
}
HighlightRule *RegexRuleFactory::createMultiLineRule(const std::string &name,
const std::string &start, const std::string &_end,
const std::string &escape, bool nested) {
std::string end = _end;
if (!end.size()) {
// if end is not specified, then end of buffer is assumed
end = "\\z";
}
// rule matching the start expression
HighlightRule *startRule = createSimpleRule(name, start);
// rule matching the end expression
HighlightRule *endRule = createSimpleRule(name, end);
// when we match the end we must exit one level
endRule->setExitLevel(1);
// the state to enter when we match the start expression
HighlightStatePtr innerState = HighlightStatePtr(new HighlightState);
// this is the rule for exiting the inner state
innerState->addRule(HighlightRulePtr(endRule));
// if escape is given, we must not match the end string when preceeded by the
// escape string, thus we build an expression that matches the escape sequence
// followed by one character
if (escape.size()) {
HighlightRule *escapeRule = createSimpleRule(name, escape + ".");
innerState->addRule(HighlightRulePtr(escapeRule));
}
if (nested) {
// if the rule is nested it means that the start expression will
// enter the same inner state.
// rule matching the start expression within the inner state
HighlightRule *nestedStartRule = createSimpleRule(name, start);
nestedStartRule->setNested(true);
innerState->addRule(HighlightRulePtr(nestedStartRule));
}
startRule->setNextState(innerState);
return startRule;
}
HighlightRule *RegexRuleFactory::createCompoundRule(
const ElemNameList &nameList, const std::string &rep) {
HighlightRule *rule = new RegexHighlightRule(rep);
for (ElemNameList::const_iterator it = nameList.begin(); it
!= nameList.end(); ++it) {
rule->addElem(*it);
}
return rule;
}
}
|