File: RegExp.h

package info (click to toggle)
webkit2gtk 2.48.5-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 429,764 kB
  • sloc: cpp: 3,697,587; javascript: 194,444; ansic: 169,997; python: 46,499; asm: 19,295; ruby: 18,528; perl: 16,602; xml: 4,650; yacc: 2,360; sh: 2,098; java: 1,993; lex: 1,327; pascal: 366; makefile: 298
file content (244 lines) | stat: -rw-r--r-- 8,490 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
/*
 *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
 *  Copyright (C) 2007-2022 Apple Inc. All rights reserved.
 *  Copyright (C) 2009 Torch Mobile, Inc.
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 *
 */

#pragma once

#include "ConcurrentJSLock.h"
#include "MatchResult.h"
#include "RegExpKey.h"
#include "Structure.h"
#include "Yarr.h"
#include <wtf/Forward.h>
#include <wtf/text/WTFString.h>

#if ENABLE(YARR_JIT)
#include "YarrJIT.h"
#endif

namespace JSC {

struct RegExpRepresentation;
class VM;

class RegExp final : public JSCell {
    friend class CachedRegExp;

public:
    using Base = JSCell;
    static constexpr unsigned StructureFlags = Base::StructureFlags | StructureIsImmortal;
    static constexpr DestructionMode needsDestruction = NeedsDestruction;

    template<typename CellType, SubspaceAccess mode>
    static GCClient::IsoSubspace* subspaceFor(VM& vm)
    {
        return &vm.regExpSpace();
    }

    JS_EXPORT_PRIVATE static RegExp* create(VM&, const String& pattern, OptionSet<Yarr::Flags>);
    static void destroy(JSCell*);
    static size_t estimatedSize(JSCell*, VM&);
    JS_EXPORT_PRIVATE static void dumpToStream(const JSCell*, PrintStream&);
    void dumpSimpleName(PrintStream&) const;

    OptionSet<Yarr::Flags> flags() const { return m_flags; }
#define JSC_DEFINE_REGEXP_FLAG_ACCESSOR(key, name, lowerCaseName, index) bool lowerCaseName() const { return m_flags.contains(Yarr::Flags::name); }
    JSC_REGEXP_FLAGS(JSC_DEFINE_REGEXP_FLAG_ACCESSOR)
#undef JSC_DEFINE_REGEXP_FLAG_ACCESSOR
    bool globalOrSticky() const { return global() || sticky(); }
    bool eitherUnicode() const { return unicode() || unicodeSets(); }

    const String& pattern() const { return m_patternString; }

    bool isValid() const { return !Yarr::hasError(m_constructionErrorCode); }
    ASCIILiteral errorMessage() const { return Yarr::errorMessage(m_constructionErrorCode); }
    JSObject* errorToThrow(JSGlobalObject* globalObject) { return Yarr::errorToThrow(globalObject, m_constructionErrorCode); }
    void reset()
    {
        m_state = NotCompiled;
        m_constructionErrorCode = Yarr::ErrorCode::NoError;
    }

    JS_EXPORT_PRIVATE int match(JSGlobalObject*, StringView, unsigned startOffset, Vector<int>& ovector);

    // Returns false if we couldn't run the regular expression for any reason.
    bool matchConcurrently(VM&, StringView, unsigned startOffset, int& position, Vector<int>& ovector);
    
    JS_EXPORT_PRIVATE MatchResult match(JSGlobalObject*, StringView, unsigned startOffset);

    bool matchConcurrently(VM&, StringView, unsigned startOffset, MatchResult&);

    // Call these versions of the match functions if you're desperate for performance.
    template<typename VectorType, Yarr::MatchFrom thread = Yarr::MatchFrom::VMThread>
    int matchInline(JSGlobalObject* nullOrGlobalObject, VM&, StringView, unsigned startOffset, VectorType& ovector);
    template<Yarr::MatchFrom thread = Yarr::MatchFrom::VMThread>
    MatchResult matchInline(JSGlobalObject* nullOrGlobalObject, VM&, StringView, unsigned startOffset);
    
    unsigned numSubpatterns() const { return m_numSubpatterns; }

    unsigned offsetVectorBaseForNamedCaptures() const
    {
        return (numSubpatterns() + 1) * 2;
    }

    int offsetVectorSize() const
    {
        if (!hasNamedCaptures())
            return offsetVectorBaseForNamedCaptures();
        return offsetVectorBaseForNamedCaptures() + m_rareData->m_numDuplicateNamedCaptureGroups;
    }

    bool hasNamedCaptures() const
    {
        return m_rareData && !m_rareData->m_captureGroupNames.isEmpty();
    }

    String getCaptureGroupNameForSubpatternId(unsigned i) const
    {
        if (!i || !m_rareData || m_rareData->m_captureGroupNames.isEmpty())
            return String();
        ASSERT(m_rareData);
        return m_rareData->m_captureGroupNames[i];
    }

WTF_ALLOW_UNSAFE_BUFFER_USAGE_BEGIN
    template <typename Offsets>
    unsigned subpatternIdForGroupName(StringView groupName, const Offsets ovector) const
    {
        if (!m_rareData)
            return 0;
        auto it = m_rareData->m_namedGroupToParenIndices.find<StringViewHashTranslator>(groupName);
        if (it == m_rareData->m_namedGroupToParenIndices.end())
            return 0;
        if (it->value.size() == 1)
            return it->value[0];

        return ovector[offsetVectorBaseForNamedCaptures() + it->value[0] - 1];
    }
WTF_ALLOW_UNSAFE_BUFFER_USAGE_END

    bool hasCode()
    {
        return m_state == JITCode || m_state == ByteCode;
    }

    bool hasCodeFor(Yarr::CharSize);
    bool hasMatchOnlyCodeFor(Yarr::CharSize);

    void deleteCode();

#if ENABLE(REGEXP_TRACING)
    constexpr static unsigned SameLineFormatedRegExpnWidth = 74;
    static void printTraceHeader();
    void printTraceData();
#endif

    inline static Structure* createStructure(VM&, JSGlobalObject*, JSValue);

    DECLARE_INFO;

    RegExpKey key() { return RegExpKey(m_flags, m_patternString); }

    String escapedPattern() const;

    String toSourceString() const;

#if ENABLE(YARR_JIT)
    Yarr::YarrCodeBlock* getRegExpJITCodeBlock()
    {
        if (m_state != JITCode)
            return nullptr;

        return m_regExpJITCode.get();
    }
#endif

    bool hasValidAtom() const { return !m_atom.isNull(); }
    const String& atom() const { return m_atom; }
    void setAtom(String&& atom) { m_atom = WTFMove(atom); }

private:
    friend class RegExpCache;
    RegExp(VM&, const String&, OptionSet<Yarr::Flags>);
    void finishCreation(VM&);

    static RegExp* createWithoutCaching(VM&, const String&, OptionSet<Yarr::Flags>);

    enum RegExpState : uint8_t {
        ParseError,
        JITCode,
        ByteCode,
        NotCompiled
    };

    void byteCodeCompileIfNecessary(VM*);

    void compile(VM*, Yarr::CharSize, std::optional<StringView> sampleString);
    void compileIfNecessary(VM&, Yarr::CharSize, std::optional<StringView> sampleString);

    void compileMatchOnly(VM*, Yarr::CharSize, std::optional<StringView> sampleString);
    void compileIfNecessaryMatchOnly(VM&, Yarr::CharSize, std::optional<StringView> sampleString);

#if ENABLE(YARR_JIT_DEBUG)
    void matchCompareWithInterpreter(StringView, int startOffset, int* offsetVector, int jitResult);
#endif

#if ENABLE(YARR_JIT)
    Yarr::YarrCodeBlock& ensureRegExpJITCode()
    {
        if (!m_regExpJITCode)
            m_regExpJITCode = makeUnique<Yarr::YarrCodeBlock>(this);
        return *m_regExpJITCode.get();
    }
#endif

    struct RareData {
        WTF_MAKE_STRUCT_FAST_ALLOCATED;
        unsigned m_numDuplicateNamedCaptureGroups;
        Vector<String> m_captureGroupNames;

        // This first element of the RHS vector is the subpatternId in the non-duplicate case.
        // For the duplicate case, the first element is the namedCaptureGroupId.
        // The remaining elements are the subpatternIds for each of the duplicate groups.
        UncheckedKeyHashMap<String, Vector<unsigned>> m_namedGroupToParenIndices;
    };

    String m_patternString;
    String m_atom;
    RegExpState m_state { NotCompiled };
    OptionSet<Yarr::Flags> m_flags;
    Yarr::ErrorCode m_constructionErrorCode { Yarr::ErrorCode::NoError };
    unsigned m_numSubpatterns { 0 };
    std::unique_ptr<Yarr::BytecodePattern> m_regExpBytecode;
#if ENABLE(YARR_JIT)
    std::unique_ptr<Yarr::YarrCodeBlock> m_regExpJITCode;
#endif
    std::unique_ptr<RareData> m_rareData;
#if ENABLE(REGEXP_TRACING)
    double m_rtMatchOnlyTotalSubjectStringLen { 0.0 };
    double m_rtMatchTotalSubjectStringLen { 0.0 };
    unsigned m_rtMatchOnlyCallCount { 0 };
    unsigned m_rtMatchOnlyFoundCount { 0 };
    unsigned m_rtMatchCallCount { 0 };
    unsigned m_rtMatchFoundCount { 0 };
#endif
};

} // namespace JSC