# A C++ Preprocessor                     -*- coding: iso-8859-1 -*-
# Copyright 2002 Martin v. Löwis
#
# This is an implementation of ISO/IEC 14882:1998,
# sections 2.1/{1-6} [lex.phases], and 16 [cpp]
# It deviates from this standards in the following aspects:
# - Universal character names (2.2/2) are not supported
# - Computations in #if directives are performed in the Python integer type
# TODO:
# - predefined macros names (16.8)
# - add file/line information into SyntaxError exceptions
# - implement #line directives
# - implement #error directives

"""A C++ Preprocessor
This module has three modes of operations:
1. As a script, it implements the cpp(1) executable, with support
   for -I and -D command line options. This has the same effect
   as invoking cpp.run().
2. With cpp.process_command_line a Preprocessor object can be created
   for a command line.
3. Creating a cpp.Preprocessor object directly allows to specify
   include directories and the input file from other sources.
A Preprocessor object then primarily supports the get_token method,
which returns cpp.Token objects."""

import re, os

trigraphs = {
    '??=': '#',
    '??(': '[',
    '??<': '{',
    '??/': '\\',
    '??)': ']',
    '??>': '}',
    "??'": '^',
    '??!': '|',
    '??-': '~'
    }

whitespace = " \t\f"

# token classes
IDENTIFIER = 'identifier'
HEADERNAME = 'headername'
NUMBER = 'number'
CHARLITERAL = 'charliteral'
STRINGLITERAL = 'stringliteral'
PREPROCESSING_OP_OR_PUNC = 'preprocessing-op-or-punc'
PRAGMA = 'pragma'
INCLUDE_START = 'include_start'
INCLUDE_END = 'include_end'
OTHER = 'other'

# [lex.operators], except for identifier-like punctuators
preprocessing_op_or_punc = [
    '{', '}', '[', ']', '#', '##', '(', ')',
    '<:', ':>', '<%', '%>', '%:', '%:%:', ';', ':',
    '...', '?', '::', '.', '.*', '+', '-', '*', '/',
    '%', '^', '&', '|', '~', '!', '=', '<', '>', '+=',
    '-=', '*=', '/=', '%=', '^=', '&=', '|=', '<<', '>>',
    '>>=', '<<=', '==', '!=', '<=', '>=', '&&', '||', '++',
    '--', ',', '->*', '->'
   ]

# [lex.digraph]
alternative_tokens = {
    '<%':'{',
    '%>':'}',
    '<:':'[',
    ':>':']',
    '%:':'#',
    '%:%:':'##',
    'and'   : '&&',
    'bitor' : '|',
    'or'    : '||',
    'xor'   : '^',
    'compl' : '~',
    'bitand': '&',
    'and_eq': '&=',
    'or_eq' : '|=',
    'xor_eq': '^=',
    'not'   : '!',
    'not_eq': '!='
    }

punctuators = (({},4),({},3),({},2),({},1))
for p in preprocessing_op_or_punc:
    punctuators[4-len(p)][0][p] = 1

# [lex.pptoken], except for preprocessing_op_or_punc and "other whitespace"
# XXX for some reason, headername never matches
pp_tokens = re.compile(r"""\
  (?P<headername>(<[^>]+>|"[^"]+"))|
  (?P<identifier>[_a-zA-Z][_a-zA-Z0-9]*)|
  (?P<number>\.?[0-9]([_a-df-zA-DF-Z0-9.]|[eE][-+]?)*)|
  (?P<charliteral>L?'([^\\']|\\['"?\abfnrtv]|\\[0-8]{1,3}|\\x[0-9a-fA-F]+)*')|
  (?P<stringliteral>L?"([^\\"]|\\['"?\abfnrtv]|\\[0-8]{1,3}|\\x[0-9a-fA-F]+)*")
""", re.VERBOSE)

#' to make emacs mode happy

def find_path(path, file):
  for p in path:
      fn = os.path.join(p, file)
      if os.path.exists(fn):
          return fn
  return None

class SyntaxError(Exception):
    pass

class Token:
    """Class representing a preprocessor token. The following attributes
    are available:
      type - indicates the token class, one of IDENTIFIER,
             HEADERNAME, CHARLITERAL, STRINGLITERAL,
             PREPROCESSING_OP_OR_PUNC, PRAGMA,
             INCLUDE_START, INCLUDE_END, OTHER
      text - indicates the token text
      file, line - source code reference
      bol    - true if token was at the beginning of a line
      space_follows - true if the token had immediate subsequent white space
      replacable - true if the token is available for further macro replacement
      """
              
    def __init__(self, type, text, file, line):
        self.type = type
        self.text = text
        self.file = file
        self.line = line
        self.bol = 0
        self.space_follows = 0
        self.replacable = 1

def copy_tokens(tokens):
    result = []
    for t in tokens:
        t1 = Token(t.type, t.text, t.file, t.line)
        t1.bol = t.bol
        t1.space_follows = t.space_follows
        t1.replacable = t.replacable
        result.append(t)
    return result

class InputFile:
    """Class representing an preprocessor source file."""
    
    def __init__(self, name, path):
        """Construct the file, given the file name (as it appeared
        in the include directive or on the command line), and its
        path."""
        
        self.name = name
        self.path = path
        self.line = 0
        file = open(path, "rb")
        data = file.read()
        self.pos = 0
        self.include = None
        self.comment = 0
        self.ifstack = []
        self.bol = 1
        self.backlog = []
        self.included = 0

        # Replace trigraphs, [lex.trigraph], [lex.phases]/1
        while 1:
            pos = data.find("??")
            if pos == -1:
                break
            tg = data[pos:pos+3]
            data = data.replace(tg, trigraphs[tg])

        # Universal Character Names, [lex.charset]/2
        for esc, hlen in (('\\u', 4), ('\\U', 8)):
            pos = data.find(esc)
            if pos!=-1:
                for i in range(pos+2, pos+2+hlen):
                    if data[i] not in '0123456789ABCDEFabcdef':
                        break
                else:
                    raise NotImplementedError, "universal character names"

        # Split input into physical source lines
        if data.find("\n")!=-1:
            data = data.split("\n")
            # strip any \r originating from \r\n
            for i in range(len(data)):
                if data[i] and data[i][-1] == '\r':
                    data[i] = data[i][:-1]
        else:
            data = data.split("\r")

        # Add line numbers
        for i in range(len(data)):
            data[i] = i+1, data[i]

        # Form logical source lines, [lex.phases]/2
        # XXX may want to preserve the original line break positions
        i = 0
        while i < len(data):
            if data[i][1] and data[i][1][-1] == '\\':
                data[i] = data[i][0], data[i][1][:-1]+data[i+1][1]
            else:
                i += 1

        self.lines = data
        self.curline = data[0][1]
        self.lineno = 1

    def unget_token(self, t):
        """Put a token into the backlog."""
        
        self.backlog.insert(0, t)

    def skip_whitespace(self):
        """Advance until the file is consumed, or a non-whitespace
        token is encountered."""
        
        while self.pos < len(self.curline):
            if self.curline[self.pos] in whitespace:
                self.pos += 1
                continue
            next = self.curline[self.pos:self.pos+2]
            if next == '//':
                # C++ comment
                self.pos = len(self.curline)
                return
            if next == '/*':
                # C comment
                pos = self.curline.find('*/')
                if pos != -1:
                    # closed within the same line
                    self.pos = pos + 2
                else:
                    # not closed in the same line
                    self.pos += 2
                    self.comment = 1
            else:
                return

    def get_token(self):
        """Return a token from the file."""
        
        if self.backlog:
            result = self.backlog[0]
            del self.backlog[0]
            return result
        # skip empty lines, open comments
        self.skip_whitespace()
        while self.pos >= len(self.curline) or self.comment:
            self.pos = 0
            self.line += 1
            if self.line >= len(self.lines):
                # End of file
                if self.comment:
                    raise SyntaxError, "incomplete comment"
                return None
            self.lineno = self.lines[self.line][0]
            self.curline = self.lines[self.line][1]
            self.bol = 1
            if self.comment:
                pos = self.curline.find("*/")
                if pos != -1:
                    self.pos = pos + 2
                    self.comment = 0
            # skip whitespace at the beginning of the new line
            self.skip_whitespace()
        self.skip_whitespace()

        bol = self.bol
        self.bol = 0

        # match token classes
        m = pp_tokens.match(self.curline, self.pos)
        if m:
            text = m.group()
            self.pos += len(text)
            kind = m.lastgroup
            # XXX: new, delete should be preprocessing_op_or_punc???
        else:
            for d, l in punctuators:
                text = self.curline[self.pos:self.pos+l]
                if d.has_key(text):
                    self.pos += l
                    kind = PREPROCESSING_OP_OR_PUNC
                    break
            else:
                text = self.curline[self.pos]
                self.pos += 1
                kind = OTHER

        result = Token(kind, text, self.name, self.line)
        result.bol = bol
        try:
            result.space_follows = self.curline[self.pos] in whitespace
        except IndexError:
            pass
        return result

class TokenList:
    """Class representing a stand-alone list of tokens, such as
    a macro replacement list."""
    
    def __init__(self, tokens):
        self.tokens = tokens
        self.pos = 0

    def get_token(self):
        """Get a token from the list."""
        
        pos = self.pos
        self.pos = pos + 1
        try:
            return self.tokens[pos]
        except IndexError:
            return None

    def unget_token(self, t):
        """Put a token back into the list. This currently assumes that
        this very token has been taken from the list immediately before."""
        
        assert self.tokens[self.pos - 1] is t
        self.pos -= 1

    def unget_tokens(self, tokens):
        """Put a list of tokens into this list at the current position."""
        
        self.tokens[self.pos:self.pos] = tokens

################## The preprocessor proper #############################

class Preprocessor:
    """A C++ preprocessor. The major method is get_token."""
    
    def __init__(self, topfile, incdirs = ['.'], sys_incdirs=[],
                 digraphs = 1):
        """Initialize a preprocessor for a source file. Optional
        arguments are:
        inc_dirs:      list of include directories
        sys_incdirs:   list of include directories for system headers
        digraphs:      true if the preprocessor should replace digraphs."""
        
        self.defines = {}
        self.incdirs = incdirs
        self.sys_incdirs = sys_incdirs
        self.files = [InputFile(topfile, topfile)]
        self.backlog = []
        self.expanded_macros = []
        self.pending_pop = 0
        self.digraphs = digraphs

    def _pop_file(self):
        """Internal. Remove a file from the stack."""

        f = self.files.pop()
        if f.ifstack:
            raise SyntaxError, "unterminated conditional"

        if f.included:
            self._unget_file_token(Token(INCLUDE_END, f.path,
                                         None, None))

    def _get_file_token(self, pop = 1):
        """Internal. Get a token from the innermost file."""

        if self.pending_pop:
            self._pop_file()
            self.pending_pop = 0
        while 1:
            if len(self.files) == 0:
                return None
            t = self.files[-1].get_token()
            if t is None:
                if pop:
                    self._pop_file()
                    return self._get_file_token()
                else:
                    self.pending_pop = 1
                return None
            elif self.digraphs and \
                     t.type in [IDENTIFIER, PREPROCESSING_OP_OR_PUNC]:
                try:
                    t1 = alternative_tokens[t.text]
                except KeyError:
                    return t
                else:
                    (t,) = copy_tokens([t])
                    t.type = PREPROCESSING_OP_OR_PUNC
                    t.text = t1
                    return t
            else:
                return t

    def _unget_file_token(self, t):
        """Internal. Put back a token to the innermost file."""
        
        self.files[-1].unget_token(t)

    def skipped_section(self):
        """Return true if we are in a skipped section."""
        
        stack = self.files and self.files[-1].ifstack
        if stack:
            return stack[-1] <= 0
        return 0

    def get_token(self):
        """Return the next token, or None at the end of the input."""
        
        if self.backlog:
            t = self.backlog[0]
            del self.backlog[0]
            return t

        token = self._get_file_token()
        if token is None:
            return token

        while self.skipped_section():
            if token.bol and token.text in ['#', '%:'] :
                self.preprocess()
            token = self._get_file_token()
            if token is None:
                return None

        if token.bol and token.text in ['#', '%:']:
            self.preprocess()
            return self.get_token()

        if token.type == IDENTIFIER and token.replacable:
            try:
                macro = self.defines[token.text]
            except KeyError:
                pass
            else:
                body = self.expand_macro(token, self)
                token = body[0]
                self.backlog[0:0] = body[1:]

        return token

    def unget_token(self, t):
        """Put back a token to the preprocessor."""
        
        self._unget_file_token(t)

    def preprocess(self):
        """Implement # directives."""
        
        tokens = []
        while 1:
            # If we reach EOF, we shall not pop to the previous file,
            # since a preprocessor instruction must not span files
            t = self._get_file_token(pop = 0)
            if t is None:
                break
            if t.bol:
                self._unget_file_token(t)
                break
            else:
                tokens.append(t)

        if not tokens:
            # empty preprocessing directive
            return

        t = tokens[0]
        del tokens[0]

        if self.skipped_section() and \
               t.text not in ['if', 'ifdef', 'elif', 'else', 'endif']:
            return

        getattr(self, 'do_'+t.text)(tokens)
        return

    def do_line(self, tokens):
        """Implement #line directives."""
        # Line directive, ignore for now
        return

    def do_pragma(self, tokens):
        """Implement pragma directives, by creating
        a new token for the pragma."""
        
        # Join fragments to a single text
        text = []
        for t in tokens:
            text.append(t.text)
        text = " ".join(text)

        # Create a new token
        t = Token(PRAGMA, text, tokens[0].file, tokens[0].line)
        # preserve original tokens
        t.pragma = tokens
        self._unget_file_token(t)

    def do_include(self, tokens):
        """Process an include directive. On return, a new file is on
        the stack."""
        
        # [cpp.include]
        # parse the directive
        if len(tokens) == 1:
            # [cpp.include]/2,3
            if tokens[0].type == HEADERNAME:
                filename = tokens[0].text
            elif tokens[0].type == STRINGLITERAL and tokens[0].text[0]!='L':
                filename = tokens[0].text
            else:
                raise SyntaxError, "Invalid include directive "+tokens[0].text
        else:
            # [cpp.include]/4
            tokens = self.expand_macros(tokens)
            if len(tokens) < 3:
                raise SyntaxError, "invalid include directive"
            ftext = tokens[0].text
            ltext = tokens[-1].text
            if ftext == '<' and ltext=='>' \
                   or ftext == ltext == '"':
                texts = []
                for t in tokens:
                    texts.append(t.text)
                filename = "".join(texts)
                print filename
            else:
                raise SyntaxError, "invalid include directive"

        # find the file
        if filename[0] == '<':
            # [cpp.include]/2
            file = find_path(self.incdirs, filename[1:-1])
            if not file:
                file = find_path(self.sys_incdirs, filename[1:-1])
        else:
            # [cpp.include]/3
            file = find_path(self.incdirs, filename[1:-1])
        if not file:
            raise SyntaxError, "include file "+filename+" not found"

        # open the file
        f = InputFile(filename[1:-1], file)
        f.included = 1
        self.files.append(f)

        # Add the INCLUDE_START token
        self._unget_file_token(Token(INCLUDE_START, file, # XXX filename?
                                     tokens[0].file, tokens[0].line))

    def do_define(self, tokens):
        """Process a #define."""
        
        macro = tokens[0]
        if macro.type != IDENTIFIER:
            raise SyntaxError, "invalid define directive"

        function_like = 0
        body_start = 1
        args = []
        if not macro.space_follows and len(tokens)>1 and tokens[1].text == '(':
            function_like = 1
            body_start += 1
            try:
                if tokens[body_start].text != ')':
                    while 1:
                        arg = tokens[body_start]
                        if arg.type != IDENTIFIER:
                            raise SyntaxError, "Error in argument list"
                        # [cpp.replace]/4
                        for a in args:
                            if a == arg.text:
                                raise SyntaxError, "Duplicate macro argument"
                        
                        args.append(arg.text)

                        if tokens[body_start+1].text == ')':
                            body_start += 2
                            break
                        
                        if tokens[body_start+1].text != ',':
                            raise SyntaxError, "Error in argument list"
                        body_start += 2
            except IndexError:
                raise SyntaxError, "missing ) in function-like macro"

        body = tokens[body_start:]

        old_macro = self.defines.get(macro.text)
        if old_macro:
            self.compare_macros(macro.text, old_macro,
                                (function_like, args, body))
        
        self.defines[macro.text] = (function_like, args, body)

    def compare_macros(self, name, (oldf, olda, oldb), (newf, newa, newb)):
        """Return if two macros definitions are equivalent. Otherwise,
        raise a SyntaxError."""
        
        # [cpp.replace]/{1-3}
        if oldf != newf or len(olda) != len(newa) or len(oldb) != len(newb):
            raise SyntaxError, "Invalid redefinition of "+name
        # [cpp.replace]/1
        for i in range(len(oldb)):
            if oldb[i].text != newb[i].text:
                raise SyntaxError, "Invalid redefinition of "+name
        # [cpp.replace]/3
        for i in range(len(olda)):
            if olda[i].text != newa[i].text:
                raise SyntaxError, "Invalid redefinition of "+name            

    def expand_macro(self, macro, token_list):
        """Return the macro replacement list for macro. This may consume
        further tokens, if the macro is function-like."""
        
        if macro.text in self.expanded_macros:
            macro.replacable = 0
            return [macro]
        self.expanded_macros.append(macro.text)
        function_like, params, body = self.defines[macro.text]
        if function_like:
            #look for opening (
            t = token_list.get_token()
            if t.text != '(':
                return [macro, t]
            arguments = self.get_arguments(token_list)
            if len(arguments) != len(params):
                raise SyntaxError, "Incorrect number of macro arguments"
            body = self.replace_arguments_in_body(params, arguments, body)
            body = self.replace_hash_param(params, body)
        else:
            arguments = []

        body = self.replace_hashhash_body(params, arguments, body)
        self.expanded_macros.pop()
        # adjust line numbers to line of macro application
        for t in body:
            t.file = macro.file
            t.line = macro.line
        return body
    
    def get_arguments(self, token_list):
        """Return the arguments for a macro from token_list."""
        
        arguments = []
        arg = []
        parens = 1
        while 1:
            t = token_list.get_token()
            if t is None:
                raise SyntaxError, "Missing )"
            if t.text == '(':
                arg.append(t)
                parens += 1
            elif t.text == ')':
                if parens == 1:
                    if arg:
                        arguments.append(arg)
                    break
                arg.append(t)
                parens -= 1
            elif t.text == ',' and parens == 1:
                arguments.append(arg)
                arg = []
            else:
                arg.append(t)
        return arguments
            
    def replace_arguments_in_body(self, params, arguments, body):
        """Find all occurrences of arguments in the macro body, and
        return an new token list with the arguments replaced."""
        
        final_body = []
        for i in range(len(body)):
            t = body[i]
            if t.type != IDENTIFIER:
                final_body.append(t)
                continue
            try:
                pos = params.index(t.text)
            except ValueError:
                final_body.append(t)
                continue
            # Don't replace parameters preceded or followed
            # by ## or preceded by #
            if i == 0 or (body[i-1].text in ['#', '##', '%:', '%:%:']):
                final_body.append(t)
                continue
            if i == len(body)-1 or body[i+1].text in ['##', '%:%:']:
                final_body.append(t)

            new_tokens = self.expand_macros(arguments[pos])
            final_body.extend(copy_tokens(new_tokens))
        return final_body

    def replace_hash_param(self, params, body):
        """Return a macro list with all occurrences
        of the # stringification operator replaced."""
        
        # Replace # param
        final_body = []
        for i in range(len(body)):
            t = body[i]
            if t.text not in ['#', '%:']:
                final_body.append(t)
                continue
            pos = params.index(body[i+1])
            arg = [a.text for a in final_arguments[pos]]
            arg = ' '.join(arg)
            # XXX: stringifying strings?
            assert '"' not in arg
            arg = Token(STRINGLITERAL,arg, None, None)
            final_body.append(arg)
        return final_body

    def replace_hashhash_body(self, params, arguments, body):
        """Return a token list with all occurrences of the
        ## token pasting operator replaced."""
        
        i = 1
        replaced = 0
        # Replace parameters first
        while i < len(body):
            if body[i].text not in ['##', '%:%:']:
                i += 1
                continue
            replaced = 1
            try:
                pos = params.index(body[i-1].text)
            except ValueError:
                pass
            else:
                new_tokens = arguments[pos]
                body[i-1:i] = copy_tokens(new_tokens)
                i += len(new_tokens)-1
            try:
                pos = params.index(body[i+1].text)
            except ValueError:
                pass
            else:
                new_tokens = arguments[pos]
                body[i+1:i+2] = copy_tokens(new_tokens)
                i += len(new_tokens)-1
            i += 3
        
        if not replaced:
            return body
        # perform the token pasting
        i = 1
        while i < len(body):
            if body[i].text not in ['##', '%:%:']:
                i += 1
                continue
            if body[i-1].type == IDENTIFIER and body[i+1].type == IDENTIFIER:
                new_type = IDENTIFIER
            else:
                new_type = 'uncertain'
            new_token = Token(new_type, body[i-1].text+body[i+1].text,
                              None, None)
            body[i-1:i+2] = [new_token]
            # no need to adjust i; it is already advanced
        # reclassify uncertain tokens
        for t in body:
            if t.type == 'uncertain':
                raise NotImplementedError, "uncertain token after pasting"
        return body

    def expand_macros(self, tokens):
        """Return a token list with all macros replaced in the tokens
        token list."""
        
        result = []
        tokens = TokenList(tokens)
        while 1:
            t = tokens.get_token()
            if t is None:
                break
            if t.type == IDENTIFIER and t.replacable:
                try:
                    macro = self.defines[t.text]
                except KeyError:
                    pass
                else:
                    body = self.expand_macro(t, tokens)
                    t = body[0]
                    tokens.unget_tokens(body[1:])
            result.append(t)
        return result

    def do_undef(self, tokens):
        """Process an #undef directive."""
        if len(tokens) != 1:
            raise SyntaxError, "extra tokens in #undef"
        try:
            del self.defines[tokens[0].text]
        except KeyError:
            pass

    def do_ifdef(self, tokens):
        """Process an #ifdef directive."""
        
        if len(tokens) != 1:
            raise SyntaxError, "extra tokens in #ifdef"
        is_defined = self.defines.has_key(tokens[0].text)
        self.push_condition(is_defined)

    def do_ifndef(self, tokens):
        """Process an #ifdef directive."""
        
        if len(tokens) != 1:
            raise SyntaxError, "extra tokens in #ifdef"
        is_defined = self.defines.has_key(tokens[0].text)
        self.push_condition(not is_defined)

    def do_endif(self, tokens):
        """Process an #endif directive."""
        
        if len(tokens) != 0:
            raise SyntaxError, "extra tokens in #endif"
        self.pop_condition()

    def do_else(self, tokens):
        """Process an #else directive."""
        
        if len(tokens) != 0:
            raise SyntaxError, "extra tokens in #endif"
        self.toggle_condition(1)

    def do_if(self, tokens):
        """Process an #if directive."""
        
        val = self.eval_if(tokens)
        self.push_condition(val)

    def do_elif(self, tokens):
        """Process an #elif directive."""
        
        val = self.eval_if(tokens)
        self.toggle_condition(val)

    def eval_if(self, tokens):
        """Return the value of the expression in a #if or #elif."""

        # Replace defined(ident) with 0 or 1
        self.replace_defined(tokens)

        # Expand all macros
        tokens = self.expand_macros(tokens)

        # Replace all identifiers with 0. Alternative tokens are not
        # replaced. Processing of new and delete is uncertain.
        for i in range(len(tokens)):
            t = tokens[i]
            if t.type == IDENTIFIER and \
               not alternative_tokens.has_key(t.text):
                tokens[i] = Token(NUMBER, "0", t.file, t.line)
                
        tokens = TokenList(tokens)
        return constant_expression(tokens)

    def replace_defined(self, tokens):
        """Replace all occurrences of the defined construct in tokens."""
        
        pos = 0
        while pos < len(tokens):
            if tokens[pos].text != 'defined':
                pos += 1
                continue
            try:
                next = tokens[pos+1]
                if next.text == '(':
                    # defined ( ident )
                    ident = tokens[pos+2]
                    if tokens[pos+3].text != ')':
                        raise SyntaxError, "Invalid defined()"
                    end = pos + 4
                else:
                    # defined ident
                    ident = tokens[pos+1]
                    end = pos + 2
            except IndexError:
                raise SyntaxError, "Invalid defined()"
            if ident.type != IDENTIFIER:
                raise SyxntaxError, "invalid defined()"
            if self.defines.has_key(ident.text):
                val = "1"
            else:
                val = "0"
            tokens[pos:end] = Token(NUMBER, val, ident.file, ident.line)

    def push_condition(self, value):
        """Put a new condition with the initial value on the conditional
        inclusion stack."""
        
        self.files[-1].ifstack.append(value)

    def toggle_condition(self, value):
        """Toggle the topmost value of the inclusion stack to value.
        If a previous section was already included, this does nothing."""
        
        stack = self.files[-1].ifstack
        if not stack:
            raise SyntaxError,"unexpected #else"
        # If the previous alternative was false, the current alternative
        # decides. Otherwise, a previous alternative was taken, and
        # the values stays at -1
        if stack[-1] == 0:
            stack[-1] = value
        else:
            stack[-1] = -1

    def pop_condition(self):
        """Remove the topmost value from the inclusion stack."""
        
        stack = self.files[-1].ifstack
        if not stack:
            raise SyntaxError,"unexpected #endif"
        stack.pop()

def process_command_line(args):
    """Process the cpp(1) command line arguments. Return a Preprocessor."""

    incdirs = ['.']
    defines = []
    while args[0][0] == '-':
        arg = args[0]
        del args[0]
        if arg == '-I':
            incdirs.append(args[0])
            del args[0]
        elif arg.startswith('-I'):
            incdirs.append(arg[2:])
        elif arg.startswith('-D'):
            if arg.find('='):
                key, value = arg[2:].split('=')
                value = [value]
            else:
                key = arg[2:]
                value = []
        else:
            raise NotImplementedError, "Argument "+args

    if len(args) != 1:
        raise NotImplementedError, "Remaining arguments %s" % (args,)

    result = Preprocessor(args[0], incdirs)
    for k, v in defines:
        result.add_define(k, v)

    return result

########### Evaluation of expressions in #if/elif ####################

def expecting(tokens, literals):
    """Return the next token if it is in literals,
    else raise a SyntaxError."""
    t = tokens.get_token()
    if t and t.text in literals:
        return t
    if t:
        tokens.unget_token(t)
    raise SyntaxError,"error in expression, expecting " + repr(literals)

def peek(tokens, literals):
    """Return the next token if it is in literals,
    else return None."""
    t = tokens.get_token()
    if t and t.text in literals:
        return t
    if t:
        tokens.unget_token(t)
    return None

def constant_expression(tokens):
    """Evaluate a constant expression, as it may appear in #if."""
    val =  conditional_expression(tokens)
    if tokens.get_token() != None:
        raise SyntaxError, "error at "+t.text
    return val

def conditional_expression(tokens):
    """conditional-expression:
        logical-or-expression
        logical-or-expression ? expression : assignment-expression"""

    # C++ disallows comma expressions and assignment expressions
    # in constant expressions, so this reduces to conditional-expression
    # in each alternative
    
    cond = logical_or_expression(tokens)
    t = peek(tokens, ['?'])
    if t is None:
        return cond
    answer1 = conditional_expression(tokens)
    t = expecting(tokens, [':'])
    answer2 = conditional_expression(tokens)

    if cond:
        return answer1
    else:
        return answer2

def logical_or_expression(tokens):
    """logical-or-expression:
        logical-and-expression
        logical-or-expression || logical-and-expression"""

    val1 = logical_and_expression(tokens)
    while 1:
        t = peek(tokens, ['or','||'])
        if t is None:
            return val1
        val2 = logical_and_expression(tokens)
        val1 = val1 or val2

def logical_and_expression(tokens):
    """logical-and-expression:
        inclusive-or-expression
        logical-and-expression && inclusive-or-expression"""

    val1 = inclusive_or_expression(tokens)
    while 1:
        t = peek(tokens, ['and','&&'])
        if t is None:
            return val1
        val2 = inclusive_or_expression(tokens)
        val2 = val1 and val2

def inclusive_or_expression(tokens):
    """inclusive-or-expression:
        exclusive-or-expression
        inclusive-or-expression | exclusive-or-expression"""

    val1 = exclusive_or_expression(tokens)
    while 1:
        t = peek(tokens, ['bitor', '|'])
        if t is None:
            return val1
        val2 = exclusive_or_expression(tokens)
        val1 = val1 | val2

def exclusive_or_expression(tokens):
    """exclusive-or-expression:
        and-expression
        exclusive-or-expression ^ and-expression"""

    val1 = and_expression(tokens)
    while 1:
        t = peek(tokens, ['xor', '^'])
        if t is None:
            return val1
        val2 = and_expression(tokens)
        val1 = val1 ^ val2

def and_expression(tokens):
    """and-expression:
        equality-expression
        and-expression & equality-expression"""

    val1 = equality_expression(tokens)
    while 1:
        t = peek(tokens, ['bitand', '&'])
        if t is None:
            return val1
        val2 = equality_expression(tokens)
        val1 = val1 & val2

def equality_expression(tokens):
    """equality-expression:
        relational-expression
        equality-expression == relational-expression
        equality-expression != relational-expression"""

    val1 = relational_expression(tokens)
    while 1:
        t = peek(tokens, ['==', '!='])
        if t is None:
            return val1
        val2 = relational_expression(tokens)
        if t.text == '==':
            val1 = val1 == val2
        else:
            val1 = val1 != val2

def relational_expression(tokens):
    """relational-expression:
        shift-expression
        relational-expression < shift-expression
        relational-expression > shift-expression
        relational-expression <= shift-expression
        relational-expression >= shift-expression"""

    val1 = shift_expression(tokens)
    while 1:
        t = peek(tokens, ['<', '>', '<=', '>='])
        if t is None:
            return val1
        val2 = shift_expression(tokens)
        if t.text == '<':
            val1 = val1 < val2
        elif t.text == '>':
            val1 = val1 > val2
        elif t.text == '<=':
            val1 = val1 <= val2
        elif t.text == '>=':
            val1 = val1 >= val2
        else:
            assert 0

def shift_expression(tokens):
    """shift-expression:
        additive-expression
        shift-expression << additive-expression
        shift-expression >> additive-expression"""

    val1 = additive_expression(tokens)
    while 1:
        t = peek(tokens, ['<<', '>>'])
        if t is None:
            return val1
        val2 = additive_expression(tokens)
        if t.text == '<<':
            val1 = val1 << val2
        else:
            val1 = val1 >> val2

def additive_expression(tokens):
    """additive-expression:
        multiplicative-expression
        additive-expression + multiplicative-expression
        additive-expression - multiplicative-expression"""

    val1 = multiplicative_expression(tokens)
    while 1:
        t = peek(tokens, ['+', '-'])
        if t is None:
            return val1
        val2 = multiplicative_expression(tokens)
        if t.text == '+':
            val1 = val1 + val2
        else:
            val1 = val1 - val2

def multiplicative_expression(tokens):
    """multiplicative-expression:
        pm-expression
        multiplicative-expression * pm-expression
        multiplicative-expression / pm-expression
        multiplicative-expression % pm-expression"""
    
    val1 = pm_expression(tokens)
    while 1:
        t = peek(tokens, ['*', '/', '%'])
        if t is None:
            return val1
        val2 = pm_expression(tokens)
        if t.text == '*':
            val1 = val1 * val2
        elif t.text == '/':
            val1 = val1 / val2
        elif t.text == '%':
            val1 = val1 % val2

def pm_expression(tokens):
    """pm-expression:
        cast-expression
        pm-expression .* cast-expression
        pm-expression ->* cast-expression"""

    # In the preprocessor, pointer-to-member cannot occur

    return cast_expression(tokens)

def cast_expression(tokens):
    """cast-expression:
        unary-expression
        ( type-id ) cast-expression"""

    # In the preprocessor, casts cannot occur, since type-id is
    # replaced with 0
    return unary_expression(tokens)

def unary_expression(tokens):
    """unary-expression:
        postfix-expression
        ++ cast-expression
        -- cast-expression
        unary-operator cast-expression
        sizeof unary-expression
        sizeof ( type-id )
        new-expression
        delete-expression"""

    # In a constant expression, increment and decrement are banned.
    # In the preprocessor, sizeof, new, and delete cannot occur, since
    # the keyword is replaced by 0

    t = peek(tokens, ['*', '&', '+', '!', '~'])
    if t is not None:
        val = cast_expression(tokens)
        if t.text in ['*', '&']:
            raise SyntaxError, "Invalid operator "+t.text
        if t.text == '+':
            return val
        elif t.text == '-':
            return -val
        elif t.text == '!':
            return not val
        elif t.text == '~':
            return ~val
        assert 0
    return postfix_expression(tokens)

def postfix_expression(tokens):
    """postfix-expression:
        primary-expression
        postfix-expression [ expression ]
        postfix-expression ( expression-list-opt )
        simple-type-specifier ( expression-list-opt )
        typename ::opt nested-name-specifier identifier ( expression-list-opt )
        typename ::opt nested-name-specifier templateopt template-id ( expression-list opt )
        postfix-expression . template-opt id-expression
        postfix-expression -> templateopt id-expression
        postfix-expression . pseudo-destructor-name
        postfix-expression -> pseudo-destructor-name
        postfix-expression ++
        postfix-expression --
        dynamic_cast < type-id > ( expression )
        static_cast < type-id > ( expression )
        reinterpret_cast < type-id > ( expression )
        const_cast < type-id > ( expression )
        typeid ( expression )
        typeid ( type-id )"""

    # In the preprocessor, anything involving template, typename, *_cast,
    # or typeid is not available, since the keyword is replaced by 0.
    # Objects and increment, decrement are banned from constant expressions;
    # likewise function calls. XXX: What about "foo"[0]?

    return primary_expression(tokens)

def primary_expression(tokens):
    """primary-expression:
        literal
        this
        ( expression )
        id-expression"""

    # this cannot occur, since it is replaced by 0.
    # id-expression can only expand to identifier, which is replaced by 0.
    # 

    t = tokens.get_token()
    if t is None:
        raise SyntaxError, "error at and of expression"

    if t.text == '(':
        val = conditional_expression(tokens)
        expecting(tokens, [')'])
        return val
    
    if t.type == NUMBER:
        if '.' in t.text or 'e' in t.text or 'E' in t.text:
            # Floating point literals can only occur if they are cast
            # to integers, however, casts cannot occur.
            raise SyntaxError, "Floating-point literals not allowed"
        return int(t.text)

    if t.type == IDENTIFIER:
        if t.text == 'true':
            return 1
        elif t.text == 'false':
            return 0
        assert 0

    raise SyntaxError, "Unexpected token "+t.text

################## Filter mode: cpp command line #######################
    
def run():
    """Implement the cpp(1) command."""
    
    import sys
    p = process_command_line(sys.argv[1:])
    file = line = None
    bol = 1
    while 1:
        try:
            t = p.get_token()
        except SyntaxError, s:
            import sys
            print >>sys.stderr, "syntax error:", s.args[0]
            raise SystemExit
        if t is None:
            return
        if t.type in [INCLUDE_START, INCLUDE_END]:
            continue
        if t.file != file:
            if not bol:
                print
            print '# %d "%s"' % (t.line, t.file)
            file = t.file
            line = t.line
            bol = 1
        elif t.line == line+1:
            print
            line += 1
            bol = 1
        elif line != t.line:
            if not bol:
                print
            print '# %d "%s"' % (t.line, t.file)
            line = t.line
            bol = 1
        print t.text,
        bol = 0
        
if __name__ == '__main__':
    run()
