File: async_lexer.py

package info (click to toggle)
python-ptk 1.3.8%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 396 kB
  • sloc: python: 3,616; makefile: 202
file content (140 lines) | stat: -rw-r--r-- 5,371 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# -*- coding: UTF-8 -*-

# (c) Jérôme Laheurte 2015-2019
# See LICENSE.txt

# XXXTODO: when pylint supports async, remove this...
# pylint: skip-file

from ptk.lexer import ProgressiveLexer, token, EOF, LexerError
from ptk.regex import DeadState
from ptk.utils import chars

try:
    from async_generator import aclosing, async_generator, yield_, yield_from_
except ImportError:
    raise RuntimeError('You need to have the async_generator package installed to use the async lexer.')


class AsyncLexer(ProgressiveLexer):
    """

    This class works like :py:class:`ProgressiveLexer` but can be feed
    the input asynchronously via :py:func:`asyncFeed`. It works with
    :py:class:`AsyncLRParser`.

    """

    async def asyncParse(self, text):
        for i, char in enumerate(text):
            await self.asyncFeed(char, i+1)
        return (await self.asyncFeed(EOF))

    async def asyncFeed(self, char, charPos=None):
        """
        Asynchronous version of :py:func:`ProgressiveLexer.feed`. This
        awaits on the :py:func:`asyncNewToken` method instead of
        calling 'newToken' synchronously.
        """
        self._input.append((char, charPos))
        while self._input:
            char, charPos = self._input.pop(0)
            async with aclosing(self._asyncFeed(char, charPos)) as agen:
                async for tok in agen:
                    value = await self.asyncNewToken(tok)
                    if value is not None:
                        return value

    @async_generator
    async def asyncIterFeed(self, char, charPos=None):
        self._input.append((char, charPos))
        while self._input:
            char, charPos = self._input.pop(0)
            async with aclosing(self._asyncFeed(char, charPos)) as agen:
                async for tok in agen:
                    value = await self.asyncNewToken(tok)
                    if value is not None:
                        await yield_(value)

    @async_generator
    async def asyncIterParse(self, chars):
        for char in chars:
            async with aclosing(self.asyncIterFeed(char)) as agen:
                await yield_from_(agen)

    async def asyncNewToken(self, tok):
        """
        Asynchronous version of py:func:`LexerBase.newToken`.
        """
        raise NotImplementedError

    @async_generator
    async def _asyncFeed(self, char, charPos): # pylint: disable=R0912,R0915
        # Unfortunately this is copy/pasted from ProgressiveLexer._feed to add the async stuff...
        if char in chars('\n'):
            self.advanceLine()
        else:
            self.advanceColumn()

        if self.consumer() is not None:
            tok = await self.consumer().feed(char)
            if tok is not None:
                self.setConsumer(None)
                if tok[0] is not None:
                    await yield_(self.Token(*tok, self.position()))
            return

        try:
            if char is EOF:
                if self._state == 0:
                    self.restartLexer()
                    await yield_(EOF)
                    return
                self._maxPos = max(self._maxPos, max(pos[0] for regex, callback, defaultType, pos in self._currentState))
                if self._maxPos == 0 and self._currentMatch:
                    raise LexerError(self._currentMatch[0][0], *self._currentMatch[0][1])
                self._matches.extend([(pos[0], callback) for regex, callback, defaultType, pos in self._currentState if pos[0] == self._maxPos])
                self._matches = [(pos, callback) for pos, callback in self._matches if pos == self._maxPos]
            else:
                if self._state == 0 and self.ignore(char):
                    return
                self._state = 1

                newState = list()
                for regex, callback, defaultType, pos in self._currentState:
                    try:
                        if regex.feed(char):
                            pos[0] = len(self._currentMatch) + 1
                    except DeadState:
                        if pos[0]:
                            self._matches.append((pos[0], callback))
                            self._maxPos = max(self._maxPos, pos[0])
                    else:
                        newState.append((regex, callback, defaultType, pos))

                if all([regex.isDeadEnd() for regex, callback, defaultType, pos in newState]):
                    for regex, callback, defaultType, pos in newState:
                        self._matches.append((len(self._currentMatch) + 1, callback))
                        self._maxPos = max(self._maxPos, len(self._currentMatch) + 1)
                    newState = list()

                self._matches = [(pos, callback) for pos, callback in self._matches if pos == self._maxPos]
                self._currentState = newState

                self._currentMatch.append((char, self.position() if charPos is None else charPos))
                if self._currentState:
                    return

                if self._maxPos == 0:
                    raise LexerError(char, *self.position())
        except LexerError:
            self.restartLexer()
            raise

        tok = self._finalizeMatch()
        if tok is not None:
            await yield_(tok)

        if char is EOF:
            self.restartLexer()
            await yield_(EOF)