| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 
 | # cython: language_level=3str
# cython: auto_pickle=False
"""
Python Lexical Analyser
Scanning an input stream
"""
from __future__ import absolute_import
import cython
cython.declare(BOL=object, EOL=object, EOF=object, NOT_FOUND=object)  # noqa:E402
from . import Errors
from .Regexps import BOL, EOL, EOF
NOT_FOUND = object()
class Scanner(object):
    """
    A Scanner is used to read tokens from a stream of characters
    using the token set specified by a Plex.Lexicon.
    Constructor:
      Scanner(lexicon, stream, name = '')
        See the docstring of the __init__ method for details.
    Methods:
      See the docstrings of the individual methods for more
      information.
      read() --> (value, text)
        Reads the next lexical token from the stream.
      position() --> (name, line, col)
        Returns the position of the last token read using the
        read() method.
      begin(state_name)
        Causes scanner to change state.
      produce(value [, text])
        Causes return of a token value to the caller of the
        Scanner.
    """
    #  lexicon = None        # Lexicon
    #  stream = None         # file-like object
    #  name = ''
    #  buffer = ''
    #
    #  These positions are used by the scanner to track its internal state:
    #  buf_start_pos = 0     # position in input of start of buffer
    #  next_pos = 0          # position in input of next char to read
    #  cur_pos = 0           # position in input of current char
    #  cur_line = 1          # line number of current char
    #  cur_line_start = 0    # position in input of start of current line
    #  start_pos = 0         # position in input of start of token
    #  current_scanner_position_tuple = ("", 0, 0)
    #        tuple of filename, line number and position in line, really mainly for error reporting
    #
    #  These positions are used to track what was read from the queue
    #   (which may differ from the internal state when tokens are replaced onto the queue)
    #  last_token_position_tuple = ("", 0, 0)  # tuple of filename, line number and position in line
    #  text = None           # text of last token read
    #  initial_state = None  # Node
    #  state_name = ''       # Name of initial state
    #  queue = None          # list of tokens and positions to be returned
    #  trace = 0
    def __init__(self, lexicon, stream, name='', initial_pos=None):
        """
        Scanner(lexicon, stream, name = '')
          |lexicon| is a Plex.Lexicon instance specifying the lexical tokens
          to be recognised.
          |stream| can be a file object or anything which implements a
          compatible read() method.
          |name| is optional, and may be the name of the file being
          scanned or any other identifying string.
        """
        self.trace = 0
        self.buffer = u''
        self.buf_start_pos = 0
        self.next_pos = 0
        self.cur_pos = 0
        self.cur_line = 1
        self.start_pos = 0
        self.current_scanner_position_tuple = ("", 0, 0)
        self.last_token_position_tuple = ("", 0, 0)
        self.text = None
        self.state_name = None
        self.lexicon = lexicon
        self.stream = stream
        self.name = name
        self.queue = []
        self.initial_state = None
        self.begin('')
        self.next_pos = 0
        self.cur_pos = 0
        self.cur_line_start = 0
        self.cur_char = BOL
        self.input_state = 1
        if initial_pos is not None:
            self.cur_line, self.cur_line_start = initial_pos[1], -initial_pos[2]
    def read(self):
        """
        Read the next lexical token from the stream and return a
        tuple (value, text), where |value| is the value associated with
        the token as specified by the Lexicon, and |text| is the actual
        string read from the stream. Returns (None, '') on end of file.
        """
        queue = self.queue
        while not queue:
            self.text, action = self.scan_a_token()
            if action is None:
                self.produce(None)
                self.eof()
            else:
                value = action.perform(self, self.text)
                if value is not None:
                    self.produce(value)
        result, self.last_token_position_tuple = queue[0]
        del queue[0]
        return result
    def unread(self, token, value, position):
        self.queue.insert(0, ((token, value), position))
    def get_current_scan_pos(self):
        # distinct from the position of the last token due to the queue
        return self.current_scanner_position_tuple
    def scan_a_token(self):
        """
        Read the next input sequence recognised by the machine
        and return (text, action). Returns ('', None) on end of
        file.
        """
        self.start_pos = self.cur_pos
        self.current_scanner_position_tuple = (
            self.name, self.cur_line, self.cur_pos - self.cur_line_start
        )
        action = self.run_machine_inlined()
        if action is not None:
            if self.trace:
                print("Scanner: read: Performing %s %d:%d" % (
                    action, self.start_pos, self.cur_pos))
            text = self.buffer[
                self.start_pos - self.buf_start_pos:
                self.cur_pos - self.buf_start_pos]
            return (text, action)
        else:
            if self.cur_pos == self.start_pos:
                if self.cur_char is EOL:
                    self.next_char()
                if self.cur_char is None or self.cur_char is EOF:
                    return (u'', None)
            raise Errors.UnrecognizedInput(self, self.state_name)
    def run_machine_inlined(self):
        """
        Inlined version of run_machine for speed.
        """
        state = self.initial_state
        cur_pos = self.cur_pos
        cur_line = self.cur_line
        cur_line_start = self.cur_line_start
        cur_char = self.cur_char
        input_state = self.input_state
        next_pos = self.next_pos
        buffer = self.buffer
        buf_start_pos = self.buf_start_pos
        buf_len = len(buffer)
        b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
            None, 0, 0, 0, u'', 0, 0
        trace = self.trace
        while 1:
            if trace:
                print("State %d, %d/%d:%s -->" % (
                    state['number'], input_state, cur_pos, repr(cur_char)))
            # Begin inlined self.save_for_backup()
            action = state['action']
            if action is not None:
                b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
                    action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos
            # End inlined self.save_for_backup()
            c = cur_char
            new_state = state.get(c, NOT_FOUND)
            if new_state is NOT_FOUND:
                new_state = c and state.get('else')
            if new_state:
                if trace:
                    print("State %d" % new_state['number'])
                state = new_state
                # Begin inlined: self.next_char()
                if input_state == 1:
                    cur_pos = next_pos
                    # Begin inlined: c = self.read_char()
                    buf_index = next_pos - buf_start_pos
                    if buf_index < buf_len:
                        c = buffer[buf_index]
                        next_pos += 1
                    else:
                        discard = self.start_pos - buf_start_pos
                        data = self.stream.read(0x1000)
                        buffer = self.buffer[discard:] + data
                        self.buffer = buffer
                        buf_start_pos += discard
                        self.buf_start_pos = buf_start_pos
                        buf_len = len(buffer)
                        buf_index -= discard
                        if data:
                            c = buffer[buf_index]
                            next_pos += 1
                        else:
                            c = u''
                    # End inlined: c = self.read_char()
                    if c == u'\n':
                        cur_char = EOL
                        input_state = 2
                    elif not c:
                        cur_char = EOL
                        input_state = 4
                    else:
                        cur_char = c
                elif input_state == 2:  # after EoL (1) -> BoL (3)
                    cur_char = u'\n'
                    input_state = 3
                elif input_state == 3:  # start new code line
                    cur_line += 1
                    cur_line_start = cur_pos = next_pos
                    cur_char = BOL
                    input_state = 1
                elif input_state == 4:  # after final line (1) -> EoF (5)
                    cur_char = EOF
                    input_state = 5
                else:  # input_state == 5  (EoF)
                    cur_char = u''
                    # End inlined self.next_char()
            else:  # not new_state
                if trace:
                    print("blocked")
                # Begin inlined: action = self.back_up()
                if b_action is not None:
                    (action, cur_pos, cur_line, cur_line_start,
                     cur_char, input_state, next_pos) = \
                        (b_action, b_cur_pos, b_cur_line, b_cur_line_start,
                         b_cur_char, b_input_state, b_next_pos)
                else:
                    action = None
                break  # while 1
                # End inlined: action = self.back_up()
        self.cur_pos = cur_pos
        self.cur_line = cur_line
        self.cur_line_start = cur_line_start
        self.cur_char = cur_char
        self.input_state = input_state
        self.next_pos = next_pos
        if trace:
            if action is not None:
                print("Doing %s" % action)
        return action
    def next_char(self):
        input_state = self.input_state
        if self.trace:
            print("Scanner: next: %s [%d] %d" % (" " * 20, input_state, self.cur_pos))
        if input_state == 1:
            self.cur_pos = self.next_pos
            c = self.read_char()
            if c == u'\n':
                self.cur_char = EOL
                self.input_state = 2
            elif not c:
                self.cur_char = EOL
                self.input_state = 4
            else:
                self.cur_char = c
        elif input_state == 2:
            self.cur_char = u'\n'
            self.input_state = 3
        elif input_state == 3:
            self.cur_line += 1
            self.cur_line_start = self.cur_pos = self.next_pos
            self.cur_char = BOL
            self.input_state = 1
        elif input_state == 4:
            self.cur_char = EOF
            self.input_state = 5
        else:  # input_state = 5
            self.cur_char = u''
        if self.trace:
            print("--> [%d] %d %r" % (input_state, self.cur_pos, self.cur_char))
    def position(self):
        """
        Return a tuple (name, line, col) representing the location of
        the last token read using the read() method. |name| is the
        name that was provided to the Scanner constructor; |line|
        is the line number in the stream (1-based); |col| is the
        position within the line of the first character of the token
        (0-based).
        """
        return self.last_token_position_tuple
    def get_position(self):
        """
        Python accessible wrapper around position(), only for error reporting.
        """
        return self.position()
    def begin(self, state_name):
        """Set the current state of the scanner to the named state."""
        self.initial_state = (
            self.lexicon.get_initial_state(state_name))
        self.state_name = state_name
    def produce(self, value, text=None):
        """
        Called from an action procedure, causes |value| to be returned
        as the token value from read(). If |text| is supplied, it is
        returned in place of the scanned text.
        produce() can be called more than once during a single call to an action
        procedure, in which case the tokens are queued up and returned one
        at a time by subsequent calls to read(), until the queue is empty,
        whereupon scanning resumes.
        """
        if text is None:
            text = self.text
        self.queue.append(((value, text), self.current_scanner_position_tuple))
    def eof(self):
        """
        Override this method if you want something to be done at
        end of file.
        """
        pass
    @property
    def start_line(self):
        return self.last_token_position_tuple[1]
 |