File: scanner.hpp

package info (click to toggle)
openmw 0.49.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 33,992 kB
  • sloc: cpp: 372,479; xml: 2,149; sh: 1,403; python: 797; makefile: 26
file content (322 lines) | stat: -rw-r--r-- 8,133 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
#ifndef COMPILER_SCANNER_H_INCLUDED
#define COMPILER_SCANNER_H_INCLUDED

#include <cctype>
#include <iosfwd>
#include <istream>
#include <string>
#include <vector>

#include "tokenloc.hpp"

namespace Compiler
{
    class ErrorHandler;
    class Parser;
    class Extensions;

    /// \brief Scanner
    ///
    /// This class translate a char-stream to a token stream (delivered via
    /// parser-callbacks).

    class MultiChar
    {
    public:
        MultiChar() { blank(); }

        explicit MultiChar(const char ch)
        {
            blank();
            mData[0] = ch;

            mLength = getCharLength(ch);
        }

        static int getCharLength(const char ch)
        {
            unsigned char c = ch;
            if (c <= 127)
                return 0;
            else if ((c & 0xE0) == 0xC0)
                return 1;
            else if ((c & 0xF0) == 0xE0)
                return 2;
            else if ((c & 0xF8) == 0xF0)
                return 3;
            else
                return -1;
        }

        bool operator==(const char ch) const
        {
            return mData[0] == ch && mData[1] == 0 && mData[2] == 0 && mData[3] == 0;
        }

        bool operator==(const MultiChar& ch) const
        {
            return mData[0] == ch.mData[0] && mData[1] == ch.mData[1] && mData[2] == ch.mData[2]
                && mData[3] == ch.mData[3];
        }

        bool operator!=(const char ch) const
        {
            return mData[0] != ch || mData[1] != 0 || mData[2] != 0 || mData[3] != 0;
        }

        bool isWhitespace() const
        {
            return (mData[0] == ' ' || mData[0] == '\t' || mData[0] == ',') && mData[1] == 0 && mData[2] == 0
                && mData[3] == 0;
        }

        bool isDigit() const
        {
            return std::isdigit(static_cast<unsigned char>(mData[0])) && mData[1] == 0 && mData[2] == 0
                && mData[3] == 0;
        }

        bool isMinusSign() const
        {
            if (mData[0] == '-' && mData[1] == 0 && mData[2] == 0 && mData[3] == 0)
                return true;

            return mData[0] == '\xe2' && mData[1] == '\x80' && mData[2] == '\x93' && mData[3] == 0;
        }

        bool isAlpha() const
        {
            if (isMinusSign())
                return false;

            return std::isalpha(static_cast<unsigned char>(mData[0])) || mData[1] != 0 || mData[2] != 0
                || mData[3] != 0;
        }

        void appendTo(std::string& str) const
        {
            for (int i = 0; i <= mLength; i++)
                str += mData[i];
        }

        void putback(std::istream& in) const
        {
            for (int i = mLength; i >= 0; i--)
                in.putback(mData[i]);
        }

        bool getFrom(std::istream& in)
        {
            blank();

            char ch = static_cast<char>(in.peek());

            if (!in.good())
                return false;

            int length = getCharLength(ch);
            if (length < 0)
                return false;

            for (int i = 0; i <= length; i++)
            {
                in.get(ch);

                if (!in.good())
                    return false;

                mData[i] = ch;
            }

            mLength = length;

            return true;
        }

        bool peek(std::istream& in)
        {
            std::streampos p_orig = in.tellg();

            char ch = static_cast<char>(in.peek());

            if (!in.good())
                return false;

            int length = getCharLength(ch);
            if (length < 0)
                return false;

            for (int i = 0; i <= length; i++)
            {
                in.get(ch);

                if (!in.good())
                    return false;

                mData[i] = ch;
            }

            mLength = length;

            in.seekg(p_orig);
            return true;
        }

        void blank()
        {
            std::fill(std::begin(mData), std::end(mData), '\0');
            mLength = -1;
        }

        std::string data() const
        {
            // NB: mLength is the number of the last element in the array
            return std::string(mData, mLength + 1);
        }

    private:
        char mData[4]{};
        int mLength{};
    };

    class Scanner
    {
        enum putback_type
        {
            Putback_None,
            Putback_Special,
            Putback_Integer,
            Putback_Float,
            Putback_Name,
            Putback_Keyword
        };

        ErrorHandler& mErrorHandler;
        TokenLoc mLoc;
        TokenLoc mPrevLoc;
        std::istream& mStream;
        const Extensions* mExtensions;
        putback_type mPutback;
        int mPutbackCode;
        int mPutbackInteger;
        float mPutbackFloat;
        std::string mPutbackName;
        TokenLoc mPutbackLoc;
        bool mStrictKeywords;
        bool mTolerantNames;
        bool mIgnoreNewline;
        bool mExpectName;
        bool mIgnoreSpecial;

    public:
        enum keyword
        {
            K_begin,
            K_end,
            K_short,
            K_long,
            K_float,
            K_if,
            K_endif,
            K_else,
            K_elseif,
            K_while,
            K_endwhile,
            K_return,
            K_messagebox,
            K_set,
            K_to
        };

        enum special
        {
            S_newline,
            S_open,
            S_close,
            S_cmpEQ,
            S_cmpNE,
            S_cmpLT,
            S_cmpLE,
            S_cmpGT,
            S_cmpGE,
            S_plus,
            S_minus,
            S_mult,
            S_div,
            S_ref,
            S_member
        };

    private:
        // not implemented

        Scanner(const Scanner&);
        Scanner& operator=(const Scanner&);

        bool get(MultiChar& c);

        void putback(MultiChar& c);

        bool scanToken(Parser& parser);

        bool scanInt(MultiChar& c, Parser& parser, bool& cont);

        bool scanFloat(const std::string& intValue, Parser& parser, bool& cont);

        bool scanName(MultiChar& c, Parser& parser, bool& cont, std::string name = {});

        /// \param name May contain the start of the name (one or more characters)
        bool scanName(std::string& name);

        bool scanSpecial(MultiChar& c, Parser& parser, bool& cont);

        bool isStringCharacter(MultiChar& c, bool lookAhead = true);

    public:
        Scanner(ErrorHandler& errorHandler, std::istream& inputStream, const Extensions* extensions = nullptr);
        ///< constructor

        void scan(Parser& parser);
        ///< Scan a token and deliver it to the parser.

        void putbackSpecial(int code, const TokenLoc& loc);
        ///< put back a special token

        void putbackInt(int value, const TokenLoc& loc);
        ///< put back an integer token

        void putbackFloat(float value, const TokenLoc& loc);
        ///< put back a float token

        void putbackName(const std::string& name, const TokenLoc& loc);
        ///< put back a name token

        void putbackKeyword(int keyword, const TokenLoc& loc);
        ///< put back a keyword token

        void listKeywords(std::vector<std::string>& keywords);
        ///< Append all known keywords to \a keywords.

        /// Treat newline character as a part of script command.
        ///
        /// \attention This mode lasts only until the next keyword is reached.
        void enableIgnoreNewlines();

        /// Do not accept keywords in quotation marks anymore.
        ///
        /// \attention This mode lasts only until the next newline is reached.
        void enableStrictKeywords();

        /// Continue parsing a name when hitting a '.' or a '-'
        ///
        /// \attention This mode lasts only until the next newline is reached.
        void enableTolerantNames();

        /// Treat '.' and '-' as the start of a name.
        ///
        /// \attention This mode lasts only until the next newline is reached or the call to scan ends.
        void enableExpectName();
    };
}

#endif