File: test_tokenize.py

package info (click to toggle)
python-jedi 0.10.0~git1%2Bf05c071-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 2,064 kB
  • ctags: 3,014
  • sloc: python: 16,997; makefile: 149; ansic: 13
file content (152 lines) | stat: -rw-r--r-- 5,215 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# -*- coding: utf-8    # This file contains Unicode characters.

from io import StringIO
from textwrap import dedent

import pytest

from jedi._compatibility import u, is_py3
from jedi.parser.token import NAME, OP, NEWLINE, STRING, INDENT
from jedi.parser import ParserWithRecovery, load_grammar, tokenize


from ..helpers import unittest


class TokenTest(unittest.TestCase):
    def test_end_pos_one_line(self):
        parsed = ParserWithRecovery(load_grammar(), dedent(u('''
        def testit():
            a = "huhu"
        ''')))
        tok = parsed.module.subscopes[0].statements[0].children[2]
        assert tok.end_pos == (3, 14)

    def test_end_pos_multi_line(self):
        parsed = ParserWithRecovery(load_grammar(), dedent(u('''
        def testit():
            a = """huhu
        asdfasdf""" + "h"
        ''')))
        tok = parsed.module.subscopes[0].statements[0].children[2].children[0]
        assert tok.end_pos == (4, 11)

    def test_simple_no_whitespace(self):
        # Test a simple one line string, no preceding whitespace
        simple_docstring = u('"""simple one line docstring"""')
        simple_docstring_io = StringIO(simple_docstring)
        tokens = tokenize.generate_tokens(simple_docstring_io.readline)
        token_list = list(tokens)
        _, value, _, prefix = token_list[0]
        assert prefix == ''
        assert value == '"""simple one line docstring"""'

    def test_simple_with_whitespace(self):
        # Test a simple one line string with preceding whitespace and newline
        simple_docstring = u('  """simple one line docstring""" \r\n')
        simple_docstring_io = StringIO(simple_docstring)
        tokens = tokenize.generate_tokens(simple_docstring_io.readline)
        token_list = list(tokens)
        assert token_list[0][0] == INDENT
        typ, value, start_pos, prefix = token_list[1]
        assert prefix == '  '
        assert value == '"""simple one line docstring"""'
        assert typ == STRING
        typ, value, start_pos, prefix = token_list[2]
        assert prefix == ' '
        assert typ == NEWLINE

    def test_function_whitespace(self):
        # Test function definition whitespace identification
        fundef = dedent(u('''
        def test_whitespace(*args, **kwargs):
            x = 1
            if x > 0:
                print(True)
        '''))
        fundef_io = StringIO(fundef)
        tokens = tokenize.generate_tokens(fundef_io.readline)
        token_list = list(tokens)
        for _, value, _, prefix in token_list:
            if value == 'test_whitespace':
                assert prefix == ' '
            if value == '(':
                assert prefix == ''
            if value == '*':
                assert prefix == ''
            if value == '**':
                assert prefix == ' '
            if value == 'print':
                assert prefix == '        '
            if value == 'if':
                assert prefix == '    '

    def test_identifier_contains_unicode(self):
        fundef = dedent(u('''
        def 我あφ():
            pass
        '''))
        fundef_io = StringIO(fundef)
        tokens = tokenize.generate_tokens(fundef_io.readline)
        token_list = list(tokens)
        unicode_token = token_list[1]
        if is_py3:
            assert unicode_token[0] == NAME
        else:
            # Unicode tokens in Python 2 seem to be identified as operators.
            # They will be ignored in the parser, that's ok.
            assert unicode_token[0] == OP

    def test_quoted_strings(self):

        string_tokens = [
            'u"test"',
            'u"""test"""',
            'U"""test"""',
            "u'''test'''",
            "U'''test'''",
        ]

        for s in string_tokens:
            parsed = ParserWithRecovery(load_grammar(), u('''a = %s\n''' % s))
            simple_stmt = parsed.module.children[0]
            expr_stmt = simple_stmt.children[0]
            assert len(expr_stmt.children) == 3
            string_tok = expr_stmt.children[2]
            assert string_tok.type == 'string'
            assert string_tok.value == s
            assert string_tok.eval() == 'test'


def test_tokenizer_with_string_literal_backslash():
    import jedi
    c = jedi.Script("statement = u'foo\\\n'; statement").goto_definitions()
    assert c[0]._name.parent.obj == 'foo'


def test_ur_literals():
    """
    Decided to parse `u''` literals regardless of Python version. This makes
    probably sense:

    - Python 3.2 doesn't support it and is still supported by Jedi, but might
      not be. While this is incorrect, it's just incorrect for one "old" and in
      the future not very important version.
    - All the other Python versions work very well with it.
    """
    def check(literal):
        io = StringIO(u(literal))
        tokens = tokenize.generate_tokens(io.readline)
        token_list = list(tokens)
        typ, result_literal, _, _ = token_list[0]
        assert typ == STRING
        assert result_literal == literal

    check('u""')
    check('ur""')
    check('Ur""')
    check('UR""')
    check('bR""')
    # Must be in the right order.
    with pytest.raises(AssertionError):
        check('Rb""')