File: test_asttokens.py

package info (click to toggle)
python-asttokens 3.0.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 624 kB
  • sloc: python: 3,560; makefile: 30
file content (182 lines) | stat: -rw-r--r-- 7,305 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# -*- coding: utf-8 -*-
import ast
import token
import tokenize
import unittest
from .context import asttokens

class TestASTTokens(unittest.TestCase):

  def assertTokenizing(self, generate_tokens):
    source = "import re  # comment\n\nfoo = 'bar'\n"
    atok = asttokens.ASTTokens(source, tokens=generate_tokens(source))
    self.assertEqual(atok.text, source)
    self.assertEqual([str(t) for t in atok.tokens], [
      "NAME:'import'",
      "NAME:'re'",
      "COMMENT:'# comment'",
      "NEWLINE:'\\n'",
      "NL:'\\n'",
      "NAME:'foo'",
      "OP:'='",
      'STRING:"\'bar\'"',
      "NEWLINE:'\\n'",
      "ENDMARKER:''"
    ])

    self.assertEqual(atok.tokens[5].type, token.NAME)
    self.assertEqual(atok.tokens[5].string, 'foo')
    self.assertEqual(atok.tokens[5].index, 5)
    self.assertEqual(atok.tokens[5].startpos, 22)
    self.assertEqual(atok.tokens[5].endpos, 25)

  def test_tokenizing(self):
    # Test that we produce meaningful tokens on initialization.
    self.assertTokenizing(generate_tokens=lambda x: None)

  def test_given_existing_tokens(self):
    # type: () -> None
    # Test that we process a give list of tokens on initialization.

    self.was_called = False

    def generate_tokens(source):
      def tokens_iter():
        # force nonlocal into scope
        for token in asttokens.util.generate_tokens(source):
          yield token
        self.was_called = True
      return tokens_iter()

    self.assertTokenizing(generate_tokens)

    self.assertTrue(self.was_called, "Should have used tokens from given iterable")


  def test_token_methods(self):
    # Test the methods that deal with tokens: prev/next_token, get_token, get_token_from_offset.
    source = "import re  # comment\n\nfoo = 'bar'\n"
    atok = asttokens.ASTTokens(source)
    self.assertEqual(str(atok.tokens[3]), "NEWLINE:'\\n'")
    self.assertEqual(str(atok.tokens[4]), "NL:'\\n'")
    self.assertEqual(str(atok.tokens[5]), "NAME:'foo'")
    self.assertEqual(str(atok.tokens[6]), "OP:'='")
    self.assertEqual(atok.prev_token(atok.tokens[5]), atok.tokens[3])
    self.assertEqual(atok.prev_token(atok.tokens[5], include_extra=True), atok.tokens[4])
    self.assertEqual(atok.next_token(atok.tokens[5]), atok.tokens[6])
    self.assertEqual(atok.next_token(atok.tokens[1]), atok.tokens[3])
    self.assertEqual(atok.next_token(atok.tokens[1], include_extra=True), atok.tokens[2])

    self.assertEqual(atok.get_token_from_offset(21), atok.tokens[4])
    self.assertEqual(atok.get_token_from_offset(22), atok.tokens[5])
    self.assertEqual(atok.get_token_from_offset(23), atok.tokens[5])
    self.assertEqual(atok.get_token_from_offset(24), atok.tokens[5])
    self.assertEqual(atok.get_token_from_offset(25), atok.tokens[5])
    self.assertEqual(atok.get_token_from_offset(26), atok.tokens[6])

    self.assertEqual(atok.get_token(2, 0), atok.tokens[4])
    self.assertEqual(atok.get_token(3, 0), atok.tokens[5])
    self.assertEqual(atok.get_token(3, 1), atok.tokens[5])
    self.assertEqual(atok.get_token(3, 2), atok.tokens[5])
    self.assertEqual(atok.get_token(3, 3), atok.tokens[5])
    self.assertEqual(atok.get_token(3, 4), atok.tokens[6])

    self.assertEqual(list(atok.token_range(atok.tokens[4], atok.tokens[6], include_extra=True)),
                     atok.tokens[4:7])

    # Verify that find_token works, including for non-coding tokens.
    self.assertEqual(atok.find_token(atok.tokens[3], token.NAME, 'foo'), atok.tokens[5])
    self.assertEqual(atok.find_token(atok.tokens[3], token.NAME, 'foo', reverse=True),
                     atok.tokens[9])
    self.assertEqual(atok.find_token(atok.tokens[3], token.NAME, reverse=True), atok.tokens[1])
    self.assertEqual(atok.find_token(atok.tokens[5], tokenize.COMMENT), atok.tokens[9])
    self.assertEqual(atok.find_token(atok.tokens[5], tokenize.COMMENT, reverse=True),
                     atok.tokens[2])
    self.assertEqual(atok.find_token(atok.tokens[5], token.NEWLINE), atok.tokens[8])
    self.assertFalse(token.ISEOF(atok.find_token(atok.tokens[5], tokenize.NEWLINE).type))
    self.assertEqual(atok.find_token(atok.tokens[5], tokenize.NL), atok.tokens[9])
    self.assertTrue(token.ISEOF(atok.find_token(atok.tokens[5], tokenize.NL).type))

  def test_unicode_offsets(self):
    # ast modules provides utf8 offsets, while tokenize uses unicode offsets. Make sure we
    # translate correctly.
    source = "foo('фыва',a,b)\n"
    atok = asttokens.ASTTokens(source)
    self.assertEqual([str(t) for t in atok.tokens], [
      "NAME:'foo'",
      "OP:'('",
      'STRING:"%s"' % repr('фыва').lstrip('u'),
      "OP:','",
      "NAME:'a'",
      "OP:','",
      "NAME:'b'",
      "OP:')'",
      "NEWLINE:'\\n'",
      "ENDMARKER:''"
    ])
    self.assertEqual(atok.tokens[2].startpos, 4)
    self.assertEqual(atok.tokens[2].endpos, 10)      # Counting characters, not bytes
    self.assertEqual(atok.tokens[4].startpos, 11)
    self.assertEqual(atok.tokens[4].endpos, 12)
    self.assertEqual(atok.tokens[6].startpos, 13)
    self.assertEqual(atok.tokens[6].endpos, 14)

    root = ast.parse(source)

    # Verify that ast parser produces offsets as we expect. This is just to inform the
    # implementation.
    string_node = next(n for n in ast.walk(root) if isinstance(n, ast.Str))
    self.assertEqual(string_node.lineno, 1)
    self.assertEqual(string_node.col_offset, 4)

    a_node = next(n for n in ast.walk(root) if isinstance(n, ast.Name) and n.id == 'a')
    self.assertEqual((a_node.lineno, a_node.col_offset), (1, 15))   # Counting bytes, not chars.

    b_node = next(n for n in ast.walk(root) if isinstance(n, ast.Name) and n.id == 'b')
    self.assertEqual((b_node.lineno, b_node.col_offset), (1, 17))

    # Here we verify that we use correct offsets (translating utf8 to unicode offsets) when
    # extracting text ranges.
    atok.mark_tokens(root)
    self.assertEqual(atok.get_text(string_node), "'фыва'")
    self.assertEqual(atok.get_text(a_node), "a")
    self.assertEqual(atok.get_text(b_node), "b")

  def test_coding_declaration(self):
    """ASTTokens should be able to parse a string with a coding declaration."""
    # In Python 2, a unicode string with a coding declaration is a SyntaxError, but we should be
    # able to parse a byte string with a coding declaration (as long as its utf-8 compatible).
    atok = asttokens.ASTTokens(str("# coding: ascii\n1\n"), parse=True)
    self.assertEqual([str(t) for t in atok.tokens], [
      "COMMENT:'# coding: ascii'",
      "NL:'\\n'",
      "NUMBER:'1'",
      "NEWLINE:'\\n'",
      "ENDMARKER:''"
    ])


def test_filename():
  filename = "myfile.py"
  atok = asttokens.ASTTokens("a", parse=True, filename=filename)
  assert filename == atok.filename


def test_doesnt_have_location():
  atok = asttokens.ASTTokens("a", parse=True)

  # Testing the documentation that says:
  # "Returns (0, 0) for nodes (like `Load`) that don't correspond
  #  to any particular text."
  context = atok.tree.body[0].value.ctx
  assert isinstance(context, ast.Load)
  assert atok.get_text_range(context) == (0, 0)
  assert atok.get_text(context) == ""

  # This actually also applies to non-nodes
  assert atok.get_text_range(None) == (0, 0)
  assert atok.get_text(None) == ""


if __name__ == "__main__":
  unittest.main()