1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
|
"""
Block-level tokenizer for mistletoe.
"""
class FileWrapper:
def __init__(self, lines, start_line=1):
self.lines = lines if isinstance(lines, list) else list(lines)
self.start_line = start_line
self._index = -1
self._anchor = 0
def __next__(self):
if self._index + 1 < len(self.lines):
self._index += 1
return self.lines[self._index]
raise StopIteration
def __iter__(self):
return self
def __repr__(self):
return repr(self.lines[self._index + 1:])
def get_pos(self):
"""Returns the current reading position.
The result is an opaque value which can be passed to `set_pos`."""
return self._index
def set_pos(self, pos):
"""Sets the current reading position."""
self._index = pos
def anchor(self):
"""@deprecated use `get_pos` instead"""
self._anchor = self.get_pos()
def reset(self):
"""@deprecated use `set_pos` instead"""
self.set_pos(self._anchor)
def peek(self):
if self._index + 1 < len(self.lines):
return self.lines[self._index + 1]
return None
def backstep(self):
if self._index != -1:
self._index -= 1
def line_number(self):
return self.start_line + self._index
def tokenize(iterable, token_types):
"""
Searches for token_types in iterable.
Args:
iterable (list): user input lines to be parsed.
token_types (list): a list of block-level token constructors.
Returns:
block-level token instances.
"""
return make_tokens(tokenize_block(iterable, token_types))
def tokenize_block(iterable, token_types, start_line=1):
"""
Returns a list of tuples (token_type, read_result, line_number).
Footnotes are parsed here, but span-level parsing has not
started yet.
"""
lines = FileWrapper(iterable, start_line=start_line)
parse_buffer = ParseBuffer()
line = lines.peek()
while line is not None:
for token_type in token_types:
if token_type.start(line):
line_number = lines.line_number() + 1
result = token_type.read(lines)
if result is not None:
parse_buffer.append((token_type, result, line_number))
break
else: # unmatched newlines
next(lines)
parse_buffer.loose = True
line = lines.peek()
return parse_buffer
def make_tokens(parse_buffer):
"""
Takes a list of tuples (token_type, read_result, line_number),
applies token_type(read_result), and sets the line_number attribute.
Footnotes are already parsed before this point,
and span-level parsing is started here.
"""
tokens = []
for token_type, result, line_number in parse_buffer:
token = token_type(result)
if token is not None:
token.line_number = line_number
tokens.append(token)
return tokens
class ParseBuffer(list):
"""
A wrapper around builtin list,
so that setattr(list, 'loose') is legal.
"""
def __init__(self, *args):
super().__init__(*args)
self.loose = False
|