1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
|
# -*- coding: utf-8 -*-
# pylint: disable=too-many-lines
from __future__ import print_function
from __future__ import unicode_literals
import logging
from cmakelang import common
from cmakelang import lex
from cmakelang.parse.util import (
is_semantic_token, is_syntactic_token, is_whitespace_token, is_comment_token
)
logger = logging.getLogger(__name__)
class NodeType(common.EnumObject):
"""
Enumeration for AST nodes
"""
_id_map = {}
NodeType.BODY = NodeType(0)
NodeType.WHITESPACE = NodeType(1)
NodeType.COMMENT = NodeType(2)
NodeType.STATEMENT = NodeType(3)
NodeType.FLOW_CONTROL = NodeType(4)
NodeType.FUNNAME = NodeType(10)
NodeType.ARGGROUP = NodeType(5)
NodeType.KWARGGROUP = NodeType(6)
NodeType.PARGGROUP = NodeType(14)
NodeType.FLAGGROUP = NodeType(15)
NodeType.PARENGROUP = NodeType(16)
NodeType.ARGUMENT = NodeType(7)
NodeType.KEYWORD = NodeType(8)
NodeType.FLAG = NodeType(9)
NodeType.ONOFFSWITCH = NodeType(11)
NodeType.ATWORDSTATEMENT = NodeType(17)
NodeType.ATWORD = NodeType(18)
# NOTE(josh): These aren't really semantic, but they have structural
# significance that is important in formatting. Since they will have a presence
# in the format tree, we give them a presence in the parse tree as well.
NodeType.LPAREN = NodeType(12)
NodeType.RPAREN = NodeType(13)
class FlowType(common.EnumObject):
"""
Enumeration for flow control types
"""
_id_map = {}
FlowType.IF = FlowType(0)
FlowType.WHILE = FlowType(1)
FlowType.FOREACH = FlowType(2)
FlowType.FUNCTION = FlowType(3)
FlowType.MACRO = FlowType(4)
class TreeNode(object):
"""
A node in the full-syntax-tree.
"""
def __init__(self, node_type):
self.node_type = node_type
self.children = []
self.parent = None
def build_ancestry(self):
"""Recursively assign the .parent member within the subtree."""
for child in self.children:
if isinstance(child, TreeNode):
child.parent = self
child.build_ancestry()
def get_location(self):
"""
Return the (line, col) of the first token in the subtree rooted at this
node.
"""
if self.children:
return self.children[0].get_location()
return lex.SourceLocation((0, 0, 0))
def count_newlines(self):
newline_count = 0
for child in self.children:
newline_count += child.count_newlines()
return newline_count
def __repr__(self):
return '{}: {}'.format(self.__class__.__name__, self.get_location())
def get_tokens(self, out=None, kind=None):
if out is None:
out = []
if kind is None:
kind = "all"
match_group = {
"semantic": is_semantic_token,
"syntactic": is_syntactic_token,
"whitespace": is_whitespace_token,
"comment": is_comment_token,
"all": lambda x: True
}[kind]
for child in self.children:
if isinstance(child, lex.Token):
if match_group(child):
out.append(child)
elif isinstance(child, TreeNode):
child.get_tokens(out, kind)
else:
raise RuntimeError("Unexpected child of type {}".format(type(child)))
return out
def get_semantic_tokens(self, out=None):
"""
Recursively reconstruct a stream of semantic tokens
"""
return self.get_tokens(out, kind="semantic")
class ParenBreaker(object):
"""
Callable that returns true if the supplied token is a right parenthential
"""
def __call__(self, token):
return token.type == lex.TokenType.RIGHT_PAREN
class KwargBreaker(object):
"""
Callable that returns true if the supplied token is in the list of keywords,
ignoring case.
"""
def __init__(self, kwargs):
self.kwargs = [kwarg.upper() for kwarg in kwargs]
def __call__(self, token):
return token.spelling.upper() in self.kwargs
def __repr__(self):
return "KwargBreaker({})".format(",".join(self.kwargs))
|