File: yappsrt.py

package info (click to toggle)
python-xml 0.8.4-10.1%2Blenny1
links: PTS
area: main
in suites: lenny
size: 4,972 kB
ctags: 10,628
sloc: python: 46,730; ansic: 14,354; xml: 968; makefile: 201; sh: 20
file content (173 lines) | stat: -rw-r--r-- 6,236 bytes
parent folder | download | duplicates (3)
# Yapps 2.0 Runtime
#
# This module is needed to run generated parsers.

# from string import *
# import exceptions
import re

class SyntaxError(Exception):
    """When we run into an unexpected token, this is the exception to use"""
    def __init__(self, pos=-1, msg="Bad Token"):
        Exception.__init__(self)
        self.pos = pos
        self.msg = msg
    def __repr__(self):
        if self.pos < 0: return "#<syntax-error>"
        else: return "SyntaxError[@ char " + `self.pos` + ": " + self.msg + "]"
    __str__ = __repr__

class NoMoreTokens(Exception):
    """Another exception object, for when we run out of tokens"""
    pass

class Scanner:
    def __init__(self, patterns, ignore, input):
        """Patterns is [(terminal,regex)...]
        Ignore is [terminal,...];
        Input is a string"""
        self.tokens = []
        self.restrictions = []
        self.input = input
        self.pos = 0
        self.ignore = ignore
        # The stored patterns are a pair (compiled regex,source
        # regex).  If the patterns variable passed in to the
        # constructor is None, we assume that the class already has a
        # proper .patterns list constructed
        if patterns is not None:
            self.patterns = []
            for k,r in patterns:
                self.patterns.append( (k, re.compile(r)) )

    def token(self, i, restrict=0):
        """Get the i'th token, and if i is one past the end, then scan
        for another token; restrict is a list of tokens that
        are allowed, or 0 for any token."""
        if i == len(self.tokens): self.scan(restrict)
        if i < len(self.tokens):
            # Make sure the restriction is more restricted
            if restrict and self.restrictions[i]:
                for r in restrict:
                    if r not in self.restrictions[i]:
                        raise "Unimplemented: restriction set changed"
            return self.tokens[i]
        raise NoMoreTokens()

    def __repr__(self):
        """Print the last 10 tokens that have been scanned in"""
        output = ''
        for t in self.tokens[-10:]:
            output = '%s\n  (@%s)  %s  =  %s' % (output,t[0],t[2],`t[3]`)
        return output

    def scan(self, restrict):
        """Should scan another token and add it to the list, self.tokens,
        and add the restriction to self.restrictions"""
        # Keep looking for a token, ignoring any in self.ignore
        while 1:
            # Search the patterns for the longest match, with earlier
            # tokens in the list having preference
            best_match = -1
            best_pat = '(error)'
            for p, regexp in self.patterns:
                # First check to see if we're ignoring this token
                if restrict and p not in restrict and p not in self.ignore:
                    continue
                m = regexp.match(self.input, self.pos)
                if m and len(m.group(0)) > best_match:
                    # We got a match that's better than the previous one
                    best_pat = p
                    best_match = len(m.group(0))

            # If we didn't find anything, raise an error
            if best_pat == '(error)' and best_match < 0:
                msg = "Bad Token"
                if restrict:
                    msg = "Trying to find one of "+join(restrict,", ")
                raise SyntaxError(self.pos, msg)

            # If we found something that isn't to be ignored, return it
            if best_pat not in self.ignore:
                # Create a token with this data
                token = (self.pos, self.pos+best_match, best_pat,
                         self.input[self.pos:self.pos+best_match])
                self.pos = self.pos + best_match
                # Only add this token if it's not in the list
                # (to prevent looping)
                if not self.tokens or token != self.tokens[-1]:
                    self.tokens.append(token)
                    self.restrictions.append(restrict)
                return
            else:
                # This token should be ignored ..
                self.pos = self.pos + best_match

class Parser:
    def __init__(self, scanner):
        self._scanner = scanner
        self._pos = 0

    def _peek(self, *types):
        """Returns the token type for lookahead; if there are any args
        then the list of args is the set of token types to allow"""
        tok = self._scanner.token(self._pos, types)
        return tok[2]

    def _scan(self, type):
        """Returns the matched text, and moves to the next token"""
        tok = self._scanner.token(self._pos, [type])
        if tok[2] != type:
            raise SyntaxError(tok[0], 'Trying to find '+type)
        self._pos = 1+self._pos
        return tok[3]



def print_error(input, err, scanner):
    """This is a really dumb long function to print error messages nicely."""
    p = err.pos
    # Figure out the line number
    line = count(input[:p], '\n')
    print err.msg+" on line "+`line+1`+":"
    # Now try printing part of the line
    text = input[max(p-80,0):p+80]
    p = p - max(p-80,0)

    # Strip to the left
    i = rfind(text[:p],'\n')
    j = rfind(text[:p],'\r')
    if i < 0 or (j < i and j >= 0): i = j
    if i >= 0 and i < p:
        p = p - i - 1
        text = text[i+1:]

    # Strip to the right
    i = find(text,'\n',p)
    j = find(text,'\r',p)
    if i < 0 or (j < i and j >= 0): i = j
    if i >= 0:
        text = text[:i]

    # Now shorten the text
    while len(text) > 70 and p > 60:
        # Cut off 10 chars
        text = "..." + text[10:]
        p = p - 7

    # Now print the string, along with an indicator
    print '> ',text
    print '> ',' '*p + '^'
    print 'List of nearby tokens:', scanner

def wrap_error_reporter(parser, rule):
    try: return getattr(parser, rule)()
    except SyntaxError, s:
        input = parser._scanner.input
        try:
            print_error(input, s, parser._scanner)
        except ImportError:
            print 'Syntax Error',s.msg,'on line',1+count(input[:s.pos], '\n')
    except NoMoreTokens:
        print 'Could not complete parsing; stopped around here:'
        print parser._scanner