1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
|
# -*- coding: utf-8 -*-
"""
***************************************************************************
parsing.py
---------------------
Copyright : (C) 2013 by CS Systemes d'information (CS SI)
Email : otb at c-s dot fr (CS SI)
Contributors : Julien Malik (CS SI)
Oscar Picas (CS SI)
***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************
"""
__author__ = 'Julien Malik, Oscar Picas'
__copyright__ = '(C) 2013, CS Systemes d\'information (CS SI)'
from collections import namedtuple
import re
def merge_pairs(list, should_merge, merge):
"""
Merges adjacent elements of list using the function merge
if they satisfy the predicate should_merge.
"""
ret = []
i = 0
while i < len(list) - 1:
a = list[i]
b = list[i + 1]
if should_merge(a, b):
ret.append(merge(a, b))
i += 2
else:
ret.append(a)
i += 1
if i == len(list) - 1:
ret.append(list[i])
return ret
QuotedString = namedtuple('QuotedString', 'contents comments')
_Arg = namedtuple('Arg', 'contents comments')
_Command = namedtuple('Command', 'name body comment')
BlankLine = namedtuple('BlankLine', '')
class File(list):
def __repr__(self):
return 'File(' + repr(list(self)) + ')'
class Comment(str):
def __repr__(self):
return 'Comment(' + unicode(self) + ')'
def Arg(contents, comments=None):
return _Arg(contents, comments or [])
def Command(name, body, comment=None):
return _Command(name, body, comment)
class CMakeParseError(Exception):
pass
def prettify(s):
"""
Returns the pretty-print of the contents of a CMakeLists file.
"""
return unicode(parse(s))
def parse(s):
'''
Parses a string s in CMakeLists format whose
contents are assumed to have come from the
file at the given path.
'''
nums_toks = tokenize(s)
nums_items = list(parse_file(nums_toks))
nums_items = attach_comments_to_commands(nums_items)
items = [item for _, item in nums_items]
return File(items)
def parse_file(toks):
'''
Yields line number ranges and top-level elements of the syntax tree for
a CMakeLists file, given a generator of tokens from the file.
toks must really be a generator, not a list, for this to work.
'''
prev_type = 'newline'
for line_num, (typ, tok_contents) in toks:
if typ == 'comment':
yield ([line_num], Comment(tok_contents))
elif typ == 'newline' and prev_type == 'newline':
yield ([line_num], BlankLine())
elif typ == 'word':
line_nums, cmd = parse_command(line_num, tok_contents, toks)
yield (line_nums, cmd)
prev_type = typ
def attach_comments_to_commands(nodes):
return merge_pairs(nodes, command_then_comment, attach_comment_to_command)
def command_then_comment(a, b):
line_nums_a, thing_a = a
line_nums_b, thing_b = b
return (isinstance(thing_a, _Command) and
isinstance(thing_b, Comment) and
set(line_nums_a).intersection(line_nums_b))
def attach_comment_to_command(lnums_command, lnums_comment):
command_lines, command = lnums_command
_, comment = lnums_comment
return command_lines, Command(command.name, command.body[:], comment)
def parse_command(start_line_num, command_name, toks):
cmd = Command(name=command_name, body=[], comment=None)
expect('left paren', toks)
for line_num, (typ, tok_contents) in toks:
if typ == 'right paren':
line_nums = range(start_line_num, line_num + 1)
return line_nums, cmd
elif typ == 'left paren':
raise ValueError('Unexpected left paren at line %s' % line_num)
elif typ in ('word', 'string'):
cmd.body.append(Arg(tok_contents, []))
elif typ == 'comment':
c = tok_contents
if cmd.body:
cmd.body[-1].comments.append(c)
else:
cmd.comments.append(c)
msg = 'File ended while processing command "%s" started at line %s' % (
command_name, start_line_num)
raise CMakeParseError(msg)
def expect(expected_type, toks):
line_num, (typ, tok_contents) = toks.next()
if typ != expected_type:
msg = 'Expected a %s, but got "%s" at line %s' % (
expected_type, tok_contents, line_num)
raise CMakeParseError(msg)
# http://stackoverflow.com/questions/691148/pythonic-way-to-implement-a-tokenizer
scanner = re.Scanner([
(r'#.*', lambda scanner, token: ("comment", token)),
(r'"[^"]*"', lambda scanner, token: ("string", token)),
(r"\(", lambda scanner, token: ("left paren", token)),
(r"\)", lambda scanner, token: ("right paren", token)),
(r'[^ \t\r\n()#"]+', lambda scanner, token: ("word", token)),
(r'\n', lambda scanner, token: ("newline", token)),
(r"\s+", None), # skip other whitespace
])
def tokenize(s):
"""
Yields pairs of the form (line_num, (token_type, token_contents))
given a string containing the contents of a CMakeLists file.
"""
toks, remainder = scanner.scan(s)
line_num = 1
if remainder != '':
msg = 'Unrecognized tokens at line %s: %s' % (line_num, remainder)
raise ValueError(msg)
for tok_type, tok_contents in toks:
yield line_num, (tok_type, tok_contents.strip())
line_num += tok_contents.count('\n')
|