File: regexrulefactory.cpp

package info (click to toggle)
source-highlight 3.1.7-1
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 10,332 kB
  • ctags: 5,233
  • sloc: sh: 11,270; cpp: 10,206; ansic: 9,515; makefile: 1,865; lex: 1,200; yacc: 1,021; php: 213; perl: 211; awk: 98; erlang: 94; lisp: 90; java: 75; ruby: 69; python: 61; asm: 43; ml: 38; ada: 36; haskell: 27; xml: 23; cs: 11; sql: 8; tcl: 6; sed: 4
file content (182 lines) | stat: -rw-r--r-- 5,693 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
//
// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2004-2008
//
// Copyright: See COPYING file that comes with this distribution
//

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "regexrulefactory.h"
#include "regexhighlightrule.h"
#include "tostringcollection.h"
#include "highlightstate.h"
#include "regexpreprocessor.h"

namespace srchilite {

/**
 * Build a non-marking group (i.e., (? ... ) starting from s
 * @param s
 * @return
 */
static const std::string non_marking_group(const std::string &s) {
    return "(?:" + s + ")";
}

/**
 * Whether the passed string consists of a single char (escluding
 * the escape char back slash)
 * @param s
 * @return
 */
static bool is_single_char(const std::string &s) {
    if (s.size() && s[0] == '\\')
        return (s.size() == 2);

    return (s.size() == 1);
}

RegexRuleFactory::RegexRuleFactory() {
}

RegexRuleFactory::~RegexRuleFactory() {
}

HighlightRule *RegexRuleFactory::createSimpleRule(const std::string &name,
        const std::string &s) {
    return new RegexHighlightRule(name, s);
}

HighlightRule *RegexRuleFactory::createWordListRule(const std::string &name,
        const WordList &list, bool caseSensitive) {
    // \< and \> are word boundaries

    // we must make a non-marking group since the string can contain
    // alternative symbols. For instance,
    // \<(?:class|for|else)\>
    // correctly detects 'for' only in isolation, while
    // (?:\<class|for|else\>)
    // will not

    std::string alternatives = toStringCollection(list, '|');

    if (!caseSensitive)
        alternatives = RegexPreProcessor::make_nonsensitive(alternatives);

    return new RegexHighlightRule(name, "\\<" + non_marking_group(alternatives) + "\\>");
}

HighlightRule *RegexRuleFactory::createListRule(const std::string &name,
        const WordList &list, bool caseSensitive) {
    std::string alternatives = toStringCollection(list, '|');

    if (!caseSensitive)
        alternatives = RegexPreProcessor::make_nonsensitive(alternatives);

    return new RegexHighlightRule(name, non_marking_group(alternatives));
}

HighlightRule *RegexRuleFactory::createLineRule(const std::string &name,
        const std::string &start, const std::string &end,
        const std::string &escape, bool nested) {

    // if start or end is not a single char we cannot create a regexp directly:
    // we need to use states, and this is achieved by switching to a multi line rule
    if (nested || !is_single_char(start) || !is_single_char(end)) {
        return createMultiLineRule(name, start, end, escape, nested);
    }

    std::string exp_string;

    /*
     in case the expression is not the start element of a
     State/Environment and it must not spawn multiple lines, and the
     delimiters are only one character, build a regular
     expression of the shape

     <startdelim>(everything but delimiters)<enddelim>

     For instance if delimiters are "<" and ">" the built regular expression is

     "<(?:[^<>])*>"
     */
    if (!escape.size()) {
        exp_string = start + non_marking_group("[^" + start
                + (end != start ? end : "") + "]") + "*" + end;
    } else {
        /*
         in case of a specified escape character it will use it for the
         (everything but delimiters) part.
         For instace, if in the example above the escape character is the
         backslash, the generated expression is

         <(?:[^\\<\\>]|\\.)*>
         */
        exp_string = start + non_marking_group("[^" + escape + start + (end
                != start ? escape + end : "") + "]|"+ escape + ".") + "*" + end;
    }

    return createSimpleRule(name, exp_string);
}

HighlightRule *RegexRuleFactory::createMultiLineRule(const std::string &name,
        const std::string &start, const std::string &_end,
        const std::string &escape, bool nested) {

    std::string end = _end;
    if (!end.size()) {
        // if end is not specified, then end of buffer is assumed
        end = "\\z";
    }

    // rule matching the start expression
    HighlightRule *startRule = createSimpleRule(name, start);

    // rule matching the end expression
    HighlightRule *endRule = createSimpleRule(name, end);
    // when we match the end we must exit one level
    endRule->setExitLevel(1);

    // the state to enter when we match the start expression
    HighlightStatePtr innerState = HighlightStatePtr(new HighlightState);

    // this is the rule for exiting the inner state
    innerState->addRule(HighlightRulePtr(endRule));

    // if escape is given, we must not match the end string when preceeded by the
    // escape string, thus we build an expression that matches the escape sequence
    // followed by one character
    if (escape.size()) {
        HighlightRule *escapeRule = createSimpleRule(name, escape + ".");
        innerState->addRule(HighlightRulePtr(escapeRule));
    }

    if (nested) {
        // if the rule is nested it means that the start expression will
        // enter the same inner state.
        // rule matching the start expression within the inner state
        HighlightRule *nestedStartRule = createSimpleRule(name, start);
        nestedStartRule->setNested(true);
        innerState->addRule(HighlightRulePtr(nestedStartRule));
    }

    startRule->setNextState(innerState);

    return startRule;
}

HighlightRule *RegexRuleFactory::createCompoundRule(
        const ElemNameList &nameList, const std::string &rep) {
    HighlightRule *rule = new RegexHighlightRule(rep);

    for (ElemNameList::const_iterator it = nameList.begin(); it
            != nameList.end(); ++it) {
        rule->addElem(*it);
    }

    return rule;
}

}