1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
|
# Yapps 2.0 Runtime
#
# This module is needed to run generated parsers.
# from string import *
# import exceptions
import re
class SyntaxError(Exception):
"""When we run into an unexpected token, this is the exception to use"""
def __init__(self, pos=-1, msg="Bad Token"):
Exception.__init__(self)
self.pos = pos
self.msg = msg
def __repr__(self):
if self.pos < 0: return "#<syntax-error>"
else: return "SyntaxError[@ char " + `self.pos` + ": " + self.msg + "]"
__str__ = __repr__
class NoMoreTokens(Exception):
"""Another exception object, for when we run out of tokens"""
pass
class Scanner:
def __init__(self, patterns, ignore, input):
"""Patterns is [(terminal,regex)...]
Ignore is [terminal,...];
Input is a string"""
self.tokens = []
self.restrictions = []
self.input = input
self.pos = 0
self.ignore = ignore
# The stored patterns are a pair (compiled regex,source
# regex). If the patterns variable passed in to the
# constructor is None, we assume that the class already has a
# proper .patterns list constructed
if patterns is not None:
self.patterns = []
for k,r in patterns:
self.patterns.append( (k, re.compile(r)) )
def token(self, i, restrict=0):
"""Get the i'th token, and if i is one past the end, then scan
for another token; restrict is a list of tokens that
are allowed, or 0 for any token."""
if i == len(self.tokens): self.scan(restrict)
if i < len(self.tokens):
# Make sure the restriction is more restricted
if restrict and self.restrictions[i]:
for r in restrict:
if r not in self.restrictions[i]:
raise "Unimplemented: restriction set changed"
return self.tokens[i]
raise NoMoreTokens()
def __repr__(self):
"""Print the last 10 tokens that have been scanned in"""
output = ''
for t in self.tokens[-10:]:
output = '%s\n (@%s) %s = %s' % (output,t[0],t[2],`t[3]`)
return output
def scan(self, restrict):
"""Should scan another token and add it to the list, self.tokens,
and add the restriction to self.restrictions"""
# Keep looking for a token, ignoring any in self.ignore
while 1:
# Search the patterns for the longest match, with earlier
# tokens in the list having preference
best_match = -1
best_pat = '(error)'
for p, regexp in self.patterns:
# First check to see if we're ignoring this token
if restrict and p not in restrict and p not in self.ignore:
continue
m = regexp.match(self.input, self.pos)
if m and len(m.group(0)) > best_match:
# We got a match that's better than the previous one
best_pat = p
best_match = len(m.group(0))
# If we didn't find anything, raise an error
if best_pat == '(error)' and best_match < 0:
msg = "Bad Token"
if restrict:
msg = "Trying to find one of "+join(restrict,", ")
raise SyntaxError(self.pos, msg)
# If we found something that isn't to be ignored, return it
if best_pat not in self.ignore:
# Create a token with this data
token = (self.pos, self.pos+best_match, best_pat,
self.input[self.pos:self.pos+best_match])
self.pos = self.pos + best_match
# Only add this token if it's not in the list
# (to prevent looping)
if not self.tokens or token != self.tokens[-1]:
self.tokens.append(token)
self.restrictions.append(restrict)
return
else:
# This token should be ignored ..
self.pos = self.pos + best_match
class Parser:
def __init__(self, scanner):
self._scanner = scanner
self._pos = 0
def _peek(self, *types):
"""Returns the token type for lookahead; if there are any args
then the list of args is the set of token types to allow"""
tok = self._scanner.token(self._pos, types)
return tok[2]
def _scan(self, type):
"""Returns the matched text, and moves to the next token"""
tok = self._scanner.token(self._pos, [type])
if tok[2] != type:
raise SyntaxError(tok[0], 'Trying to find '+type)
self._pos = 1+self._pos
return tok[3]
def print_error(input, err, scanner):
"""This is a really dumb long function to print error messages nicely."""
p = err.pos
# Figure out the line number
line = count(input[:p], '\n')
print err.msg+" on line "+`line+1`+":"
# Now try printing part of the line
text = input[max(p-80,0):p+80]
p = p - max(p-80,0)
# Strip to the left
i = rfind(text[:p],'\n')
j = rfind(text[:p],'\r')
if i < 0 or (j < i and j >= 0): i = j
if i >= 0 and i < p:
p = p - i - 1
text = text[i+1:]
# Strip to the right
i = find(text,'\n',p)
j = find(text,'\r',p)
if i < 0 or (j < i and j >= 0): i = j
if i >= 0:
text = text[:i]
# Now shorten the text
while len(text) > 70 and p > 60:
# Cut off 10 chars
text = "..." + text[10:]
p = p - 7
# Now print the string, along with an indicator
print '> ',text
print '> ',' '*p + '^'
print 'List of nearby tokens:', scanner
def wrap_error_reporter(parser, rule):
try: return getattr(parser, rule)()
except SyntaxError, s:
input = parser._scanner.input
try:
print_error(input, s, parser._scanner)
except ImportError:
print 'Syntax Error',s.msg,'on line',1+count(input[:s.pos], '\n')
except NoMoreTokens:
print 'Could not complete parsing; stopped around here:'
print parser._scanner
|