File: regcomp.cc

package info (click to toggle)
re2c 4.4-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 51,512 kB
  • sloc: cpp: 34,160; ml: 8,494; sh: 5,311; makefile: 1,014; haskell: 611; python: 431; ansic: 234; javascript: 113
file content (147 lines) | stat: -rw-r--r-- 4,549 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#include <stddef.h>
#include <stdint.h>
#include <algorithm>
#include <valarray>
#include <vector>

#include "lib/lex.h"
#include "lib/regcomp_dfa_multipass.h"
#include "lib/regex.h"
#include "lib/regex_impl.h"
#include "lib/regoff_trie.h"
#include "src/dfa/dfa.h"
#include "src/msg/location.h"
#include "src/msg/msg.h"
#include "src/nfa/nfa.h"
#include "src/options/opt.h"
#include "src/parse/ast.h"
#include "src/regexp/regexp.h"
#include "src/regexp/rule.h"
#include "src/util/check.h"
#include "src/util/range.h"

namespace re2c {

int lex(RE2C_LIB_STYPE* yylval, const char* pattern);
const AstNode* regexp;

} // namespace re2c

using namespace re2c;
using namespace re2c::libre2c;

// redefine CHECK_RET, as it needs to return `int` rather than `Ret`
#undef CHECK_RET
#define CHECK_RET(x) do { if (x != Ret::OK) return 1; } while(0)

int regcomp(regex_t* preg, const char* pattern, int cflags) {
    AstAllocator ast_alc;
    OutAllocator out_alc;

    Msg msg;
    Opt opts(out_alc, msg);

    conopt_t& globopts = const_cast<conopt_t&>(opts.global());
    globopts.set_target(re2c::Target::CODE);
    globopts.set_flex_syntax(true);
    globopts.set_nested_negative_tags(!(cflags & (REG_NFA | REG_MULTIPASS)));

    opts.set_supported_api_styles({"functions"});
    opts.set_supported_code_models({"goto-label"});
    opts.init_tags_history((cflags & REG_SUBHIST) != 0);
    opts.init_tags_automatic((cflags & REG_AUTOTAGS) != 0);
    opts.init_captures(true);
    opts.init_captures_posix((cflags & REG_LEFTMOST) == 0);
    opts.init_captures_array(true);

    CHECK_RET(opts.fix_global_and_defaults());

    const opt_t* opt;
    CHECK_RET(opts.snapshot(&opt));

    preg->flags = cflags;

    Ast ast(ast_alc, out_alc);
    const AstNode* a = parse(pattern, ast);

    std::vector<AstRule> arv{AstRule{a, ast.sem_act(NOWHERE, nullptr, nullptr, false)}};
    RESpec re(opt, msg);
    CHECK_RET(re.init(arv));

    find_fixed_tags(re);
    insert_default_tags(re);

    if ((cflags & REG_NFA) == 0) {
        preg->char2class = new size_t[257]; // ASCII + end-of-input $ symbol
        split_charset(re);
        for (uint32_t i = 1, j = 0; i < re.charset.size(); ++i) {
            for (; j < re.charset[i]; ++j) {
                preg->char2class[j] = i - 1;
            }
        }
    }

    Tnfa* nfa = new Tnfa;
    CHECK_RET(re_to_nfa(*nfa, std::move(re), msg));

    DCHECK(nfa->rules.size() == 1);
    preg->re_nsub = nfa->rules[0].ncap + 1;
    preg->re_ntag = nfa->tags.size();

    if (cflags & REG_NFA) {
        preg->nfa = nfa;
        if ((cflags & REG_TRIE) && (cflags & REG_LEFTMOST)) {
            preg->simctx = new lzsimctx_t(*nfa, preg->re_nsub, cflags);
        } else if (cflags & REG_TRIE) {
            preg->simctx = new pzsimctx_t(*nfa, preg->re_nsub, cflags);
        } else if (cflags & REG_LEFTMOST) {
            preg->simctx = new lsimctx_t(*nfa, preg->re_nsub, cflags);
        } else {
            preg->simctx = new psimctx_t(*nfa, preg->re_nsub, cflags);
        }
    } else if (cflags & REG_MULTIPASS) {
        preg->mptdfa = new MpTdfa(std::move(*nfa), opt, cflags);
        delete nfa;
        opt = nullptr; // transfer options ownership to RLDFA

        if (cflags & REG_SUBHIST) {
            preg->regtrie = new regoff_trie_t(preg->mptdfa->tags.size());
        }
    } else {
        Tdfa* dfa = new Tdfa(*new DfaAllocator(), nfa->charset.size(), Rule::NONE);
        CHECK_RET(determinization(std::move(*nfa), *dfa, opt, msg, ""));
        preg->dfa = dfa;
        delete nfa;

        cutoff_dead_rules(*dfa, opt, "", msg);
        insert_fallback_tags(*dfa);
        compact_and_optimize_tags(opt, *dfa);

        if (cflags & REG_TSTRING) {
            // T-string does not need intermediate storage for tag values.
        } else if (cflags & REG_SUBHIST) {
            preg->regtrie = new regoff_trie_t(static_cast<size_t>(dfa->maxtagver + 1));
        } else {
            preg->regs = new regoff_t[dfa->maxtagver + 1];
        }
    }

    if (cflags & REG_TSTRING) {
        // T-string is stored in the regexp (reallocated on each regtstring() call if needed), and
        // the user gets an immutable view of it (const ref).
        tstring_t& ts = preg->tstring;
        ts.capacity = 256;
        ts.string = new tchar_t[ts.capacity];
        ts.length = 0;
    } else if (cflags & REG_SUBHIST) {
        // Allocated on every call to regparse() and returned to the user.
    } else {
        // Allocated by the user.
    }

    delete opt;
    return 0;
}

#undef CHECK_RET