1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
|
# -*- coding: utf-8 -*-
import ast
import token
import tokenize
import unittest
from .context import asttokens
class TestASTTokens(unittest.TestCase):
def assertTokenizing(self, generate_tokens):
source = "import re # comment\n\nfoo = 'bar'\n"
atok = asttokens.ASTTokens(source, tokens=generate_tokens(source))
self.assertEqual(atok.text, source)
self.assertEqual([str(t) for t in atok.tokens], [
"NAME:'import'",
"NAME:'re'",
"COMMENT:'# comment'",
"NEWLINE:'\\n'",
"NL:'\\n'",
"NAME:'foo'",
"OP:'='",
'STRING:"\'bar\'"',
"NEWLINE:'\\n'",
"ENDMARKER:''"
])
self.assertEqual(atok.tokens[5].type, token.NAME)
self.assertEqual(atok.tokens[5].string, 'foo')
self.assertEqual(atok.tokens[5].index, 5)
self.assertEqual(atok.tokens[5].startpos, 22)
self.assertEqual(atok.tokens[5].endpos, 25)
def test_tokenizing(self):
# Test that we produce meaningful tokens on initialization.
self.assertTokenizing(generate_tokens=lambda x: None)
def test_given_existing_tokens(self):
# type: () -> None
# Test that we process a give list of tokens on initialization.
self.was_called = False
def generate_tokens(source):
def tokens_iter():
# force nonlocal into scope
for token in asttokens.util.generate_tokens(source):
yield token
self.was_called = True
return tokens_iter()
self.assertTokenizing(generate_tokens)
self.assertTrue(self.was_called, "Should have used tokens from given iterable")
def test_token_methods(self):
# Test the methods that deal with tokens: prev/next_token, get_token, get_token_from_offset.
source = "import re # comment\n\nfoo = 'bar'\n"
atok = asttokens.ASTTokens(source)
self.assertEqual(str(atok.tokens[3]), "NEWLINE:'\\n'")
self.assertEqual(str(atok.tokens[4]), "NL:'\\n'")
self.assertEqual(str(atok.tokens[5]), "NAME:'foo'")
self.assertEqual(str(atok.tokens[6]), "OP:'='")
self.assertEqual(atok.prev_token(atok.tokens[5]), atok.tokens[3])
self.assertEqual(atok.prev_token(atok.tokens[5], include_extra=True), atok.tokens[4])
self.assertEqual(atok.next_token(atok.tokens[5]), atok.tokens[6])
self.assertEqual(atok.next_token(atok.tokens[1]), atok.tokens[3])
self.assertEqual(atok.next_token(atok.tokens[1], include_extra=True), atok.tokens[2])
self.assertEqual(atok.get_token_from_offset(21), atok.tokens[4])
self.assertEqual(atok.get_token_from_offset(22), atok.tokens[5])
self.assertEqual(atok.get_token_from_offset(23), atok.tokens[5])
self.assertEqual(atok.get_token_from_offset(24), atok.tokens[5])
self.assertEqual(atok.get_token_from_offset(25), atok.tokens[5])
self.assertEqual(atok.get_token_from_offset(26), atok.tokens[6])
self.assertEqual(atok.get_token(2, 0), atok.tokens[4])
self.assertEqual(atok.get_token(3, 0), atok.tokens[5])
self.assertEqual(atok.get_token(3, 1), atok.tokens[5])
self.assertEqual(atok.get_token(3, 2), atok.tokens[5])
self.assertEqual(atok.get_token(3, 3), atok.tokens[5])
self.assertEqual(atok.get_token(3, 4), atok.tokens[6])
self.assertEqual(list(atok.token_range(atok.tokens[4], atok.tokens[6], include_extra=True)),
atok.tokens[4:7])
# Verify that find_token works, including for non-coding tokens.
self.assertEqual(atok.find_token(atok.tokens[3], token.NAME, 'foo'), atok.tokens[5])
self.assertEqual(atok.find_token(atok.tokens[3], token.NAME, 'foo', reverse=True),
atok.tokens[9])
self.assertEqual(atok.find_token(atok.tokens[3], token.NAME, reverse=True), atok.tokens[1])
self.assertEqual(atok.find_token(atok.tokens[5], tokenize.COMMENT), atok.tokens[9])
self.assertEqual(atok.find_token(atok.tokens[5], tokenize.COMMENT, reverse=True),
atok.tokens[2])
self.assertEqual(atok.find_token(atok.tokens[5], token.NEWLINE), atok.tokens[8])
self.assertFalse(token.ISEOF(atok.find_token(atok.tokens[5], tokenize.NEWLINE).type))
self.assertEqual(atok.find_token(atok.tokens[5], tokenize.NL), atok.tokens[9])
self.assertTrue(token.ISEOF(atok.find_token(atok.tokens[5], tokenize.NL).type))
def test_unicode_offsets(self):
# ast modules provides utf8 offsets, while tokenize uses unicode offsets. Make sure we
# translate correctly.
source = "foo('фыва',a,b)\n"
atok = asttokens.ASTTokens(source)
self.assertEqual([str(t) for t in atok.tokens], [
"NAME:'foo'",
"OP:'('",
'STRING:"%s"' % repr('фыва').lstrip('u'),
"OP:','",
"NAME:'a'",
"OP:','",
"NAME:'b'",
"OP:')'",
"NEWLINE:'\\n'",
"ENDMARKER:''"
])
self.assertEqual(atok.tokens[2].startpos, 4)
self.assertEqual(atok.tokens[2].endpos, 10) # Counting characters, not bytes
self.assertEqual(atok.tokens[4].startpos, 11)
self.assertEqual(atok.tokens[4].endpos, 12)
self.assertEqual(atok.tokens[6].startpos, 13)
self.assertEqual(atok.tokens[6].endpos, 14)
root = ast.parse(source)
# Verify that ast parser produces offsets as we expect. This is just to inform the
# implementation.
string_node = next(n for n in ast.walk(root) if isinstance(n, ast.Str))
self.assertEqual(string_node.lineno, 1)
self.assertEqual(string_node.col_offset, 4)
a_node = next(n for n in ast.walk(root) if isinstance(n, ast.Name) and n.id == 'a')
self.assertEqual((a_node.lineno, a_node.col_offset), (1, 15)) # Counting bytes, not chars.
b_node = next(n for n in ast.walk(root) if isinstance(n, ast.Name) and n.id == 'b')
self.assertEqual((b_node.lineno, b_node.col_offset), (1, 17))
# Here we verify that we use correct offsets (translating utf8 to unicode offsets) when
# extracting text ranges.
atok.mark_tokens(root)
self.assertEqual(atok.get_text(string_node), "'фыва'")
self.assertEqual(atok.get_text(a_node), "a")
self.assertEqual(atok.get_text(b_node), "b")
def test_coding_declaration(self):
"""ASTTokens should be able to parse a string with a coding declaration."""
# In Python 2, a unicode string with a coding declaration is a SyntaxError, but we should be
# able to parse a byte string with a coding declaration (as long as its utf-8 compatible).
atok = asttokens.ASTTokens(str("# coding: ascii\n1\n"), parse=True)
self.assertEqual([str(t) for t in atok.tokens], [
"COMMENT:'# coding: ascii'",
"NL:'\\n'",
"NUMBER:'1'",
"NEWLINE:'\\n'",
"ENDMARKER:''"
])
def test_filename():
filename = "myfile.py"
atok = asttokens.ASTTokens("a", parse=True, filename=filename)
assert filename == atok.filename
def test_doesnt_have_location():
atok = asttokens.ASTTokens("a", parse=True)
# Testing the documentation that says:
# "Returns (0, 0) for nodes (like `Load`) that don't correspond
# to any particular text."
context = atok.tree.body[0].value.ctx
assert isinstance(context, ast.Load)
assert atok.get_text_range(context) == (0, 0)
assert atok.get_text(context) == ""
# This actually also applies to non-nodes
assert atok.get_text_range(None) == (0, 0)
assert atok.get_text(None) == ""
if __name__ == "__main__":
unittest.main()
|