1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
"Provides a post-lexer for implementing Python-style indentation."
from abc import ABC, abstractmethod
from typing import List, Iterator
from .exceptions import LarkError
from .lark import PostLex
from .lexer import Token
###{standalone
class DedentError(LarkError):
pass
class Indenter(PostLex, ABC):
"""This is a postlexer that "injects" indent/dedent tokens based on indentation.
It keeps track of the current indentation, as well as the current level of parentheses.
Inside parentheses, the indentation is ignored, and no indent/dedent tokens get generated.
Note: This is an abstract class. To use it, inherit and implement all its abstract methods:
- tab_len
- NL_type
- OPEN_PAREN_types, CLOSE_PAREN_types
- INDENT_type, DEDENT_type
See also: the ``postlex`` option in `Lark`.
"""
paren_level: int
indent_level: List[int]
def __init__(self) -> None:
self.paren_level = 0
self.indent_level = [0]
assert self.tab_len > 0
def handle_NL(self, token: Token) -> Iterator[Token]:
if self.paren_level > 0:
return
yield token
indent_str = token.rsplit('\n', 1)[1] # Tabs and spaces
indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len
if indent > self.indent_level[-1]:
self.indent_level.append(indent)
yield Token.new_borrow_pos(self.INDENT_type, indent_str, token)
else:
while indent < self.indent_level[-1]:
self.indent_level.pop()
yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token)
if indent != self.indent_level[-1]:
raise DedentError('Unexpected dedent to column %s. Expected dedent to %s' % (indent, self.indent_level[-1]))
def _process(self, stream):
for token in stream:
if token.type == self.NL_type:
yield from self.handle_NL(token)
else:
yield token
if token.type in self.OPEN_PAREN_types:
self.paren_level += 1
elif token.type in self.CLOSE_PAREN_types:
self.paren_level -= 1
assert self.paren_level >= 0
while len(self.indent_level) > 1:
self.indent_level.pop()
yield Token(self.DEDENT_type, '')
assert self.indent_level == [0], self.indent_level
def process(self, stream):
self.paren_level = 0
self.indent_level = [0]
return self._process(stream)
# XXX Hack for ContextualLexer. Maybe there's a more elegant solution?
@property
def always_accept(self):
return (self.NL_type,)
@property
@abstractmethod
def NL_type(self) -> str:
"The name of the newline token"
raise NotImplementedError()
@property
@abstractmethod
def OPEN_PAREN_types(self) -> List[str]:
"The names of the tokens that open a parenthesis"
raise NotImplementedError()
@property
@abstractmethod
def CLOSE_PAREN_types(self) -> List[str]:
"""The names of the tokens that close a parenthesis
"""
raise NotImplementedError()
@property
@abstractmethod
def INDENT_type(self) -> str:
"""The name of the token that starts an indentation in the grammar.
See also: %declare
"""
raise NotImplementedError()
@property
@abstractmethod
def DEDENT_type(self) -> str:
"""The name of the token that end an indentation in the grammar.
See also: %declare
"""
raise NotImplementedError()
@property
@abstractmethod
def tab_len(self) -> int:
"""How many spaces does a tab equal"""
raise NotImplementedError()
class PythonIndenter(Indenter):
"""A postlexer that "injects" _INDENT/_DEDENT tokens based on indentation, according to the Python syntax.
See also: the ``postlex`` option in `Lark`.
"""
NL_type = '_NEWLINE'
OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE']
CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE']
INDENT_type = '_INDENT'
DEDENT_type = '_DEDENT'
tab_len = 8
###}
|