File: parse.py

package info (click to toggle)
pyannotate 1.2.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye
  • size: 384 kB
  • sloc: python: 5,001; makefile: 5
file content (341 lines) | stat: -rw-r--r-- 10,667 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
"""Parse type annotations collected at runtime by collect_types and dumped as JSON.

Parse JSON data and also parse type comment strings into type objects.

The collect_types tool is in pyannotate_runtime/collect_types.py.
"""

import json
import re
import sys

from typing import Any, List, Mapping, Set, Tuple
try:
    from typing import Text
except ImportError:
    # In Python 3.5.1 stdlib, typing.py does not define Text
    Text = str  # type: ignore
from mypy_extensions import NoReturn, TypedDict

from pyannotate_tools.annotations.types import (
    AbstractType,
    AnyType,
    ARG_POS,
    ARG_STAR,
    ARG_STARSTAR,
    Argument,
    ClassType,
    TupleType,
    UnionType,
    NoReturnType,
)

PY2 = sys.version_info < (3,)


# Rules for replacing some type names that aren't valid Python names or that
# are otherwise invalid.
TYPE_FIXUPS = {
    # The dictionary-* names come from Python 2 `__class__.__name__` values
    # from `dict.iterkeys()`, etc. Python 3 uses valid names.
    'dictionary-keyiterator': 'Iterator',
    'dictionary-valueiterator': 'Iterator',
    'dictionary-itemiterator': 'Iterator',
    'pyannotate_runtime.collect_types.UnknownType': 'Any',
    'pyannotate_runtime.collect_types.NoReturnType': 'mypy_extensions.NoReturn',
    'function': 'Callable',
    'functools.partial': 'Callable',
    'long': 'int',
    'unicode': 'Text',
    'generator': 'Iterator',
    'listiterator': 'Iterator',
    'instancemethod': 'Callable',
    'itertools.imap': 'Iterator',
    'operator.methodcaller': 'Callable',
    'method': 'Callable',
    'method-wrapper': 'Callable',
    'mappingproxy': 'Mapping',
    'file': 'IO[bytes]',
    'instance': 'Any',
    'collections.defaultdict': 'Dict',
}


# Input JSON data entry
RawEntry = TypedDict('RawEntry', {'path': Text,
                                  'line': int,
                                  'func_name': Text,
                                  'type_comments': List[Text],
                                  'samples': int})


class FunctionInfo(object):
    """Deserialized raw runtime information for a single function (based on RawEntry)"""

    def __init__(self, path, line, func_name, type_comments, samples):
        # type: (str, int, str, List[str], int) -> None
        self.path = path
        self.line = line
        self.func_name = func_name
        self.type_comments = type_comments
        self.samples = samples


class ParseError(Exception):
    """Raised on any type comment parse error.

    The 'comment' attribute contains the comment that produced the error.
    """

    def __init__(self, comment):
        # type: (str) -> None
        super(ParseError, self).__init__('Invalid type comment: %s' % comment)
        self.comment = comment


def parse_json(path):
    # type: (str) -> List[FunctionInfo]
    """Deserialize a JSON file containing runtime collected types.

    The input JSON is expected to to have a list of RawEntry items.
    """
    with open(path) as f:
        data = json.load(f)  # type: List[RawEntry]
    result = []

    def assert_type(value, typ):
        # type: (object, type) -> None
        assert isinstance(value, typ), '%s: Unexpected type %r' % (path, type(value).__name__)

    def assert_dict_item(dictionary, key, typ):
        # type: (Mapping[Any, Any], str, type) -> None
        assert key in dictionary, '%s: Missing dictionary key %r' % (path, key)
        value = dictionary[key]
        assert isinstance(value, typ), '%s: Unexpected type %r for key %r' % (
            path, type(value).__name__, key)

    assert_type(data, list)
    for item in data:
        assert_type(item, dict)
        assert_dict_item(item, 'path', Text)
        assert_dict_item(item, 'line', int)
        assert_dict_item(item, 'func_name', Text)
        assert_dict_item(item, 'type_comments', list)
        for comment in item['type_comments']:
            assert_type(comment, Text)
        assert_type(item['samples'], int)
        info = FunctionInfo(encode(item['path']),
                            item['line'],
                            encode(item['func_name']),
                            [encode(comment) for comment in item['type_comments']],
                            item['samples'])
        result.append(info)
    return result


class Token(object):
    """Abstract base class for tokens used for parsing type comments"""
    text = ''


class DottedName(Token):
    """An identifier token, such as 'List', 'int' or 'package.name'"""

    def __init__(self, text):
        # type: (str) -> None
        self.text = text

    def __repr__(self):
        # type: () -> str
        return 'DottedName(%s)' % self.text


class Separator(Token):
    """A separator or punctuator token such as '(', '[' or '->'"""

    def __init__(self, text):
        # type: (str) -> None
        self.text = text

    def __repr__(self):
        # type: () -> str
        return self.text


class End(Token):
    """A token representing the end of a type comment"""

    def __repr__(self):
        # type: () -> str
        return 'End()'


def tokenize(s):
    # type: (str) -> List[Token]
    """Translate a type comment into a list of tokens."""
    original = s
    tokens = []  # type: List[Token]
    while True:
        if not s:
            tokens.append(End())
            return tokens
        elif s[0] == ' ':
            s = s[1:]
        elif s[0] in '()[],*':
            tokens.append(Separator(s[0]))
            s = s[1:]
        elif s[:2] == '->':
            tokens.append(Separator('->'))
            s = s[2:]
        else:
            m = re.match(r'[-\w]+(\s*(\.|:)\s*[-/\w]*)*', s)
            if not m:
                raise ParseError(original)
            fullname = m.group(0)
            fullname = fullname.replace(' ', '')
            if fullname in TYPE_FIXUPS:
                fullname = TYPE_FIXUPS[fullname]
            # pytz creates classes with the name of the timezone being used:
            # https://github.com/stub42/pytz/blob/f55399cddbef67c56db1b83e0939ecc1e276cf42/src/pytz/tzfile.py#L120-L123
            # This causes pyannotates to crash as it's invalid to have a class
            # name with a `/` in it (e.g. "pytz.tzfile.America/Los_Angeles")
            if fullname.startswith('pytz.tzfile.'):
                fullname = 'datetime.tzinfo'
            if '-' in fullname or '/' in fullname:
                # Not a valid Python name; there are many places that
                # generate these, so we just substitute Any rather
                # than crashing.
                fullname = 'Any'
            tokens.append(DottedName(fullname))
            s = s[len(m.group(0)):]


def parse_type_comment(comment):
    # type: (str) -> Tuple[List[Argument], AbstractType]
    """Parse a type comment of form '(arg1, ..., argN) -> ret'."""
    return Parser(comment).parse()


class Parser(object):
    """Implementation of the type comment parser"""

    def __init__(self, comment):
        # type: (str) -> None
        self.comment = comment
        self.tokens = tokenize(comment)
        self.i = 0

    def parse(self):
        # type: () -> Tuple[List[Argument], AbstractType]
        self.expect('(')
        arg_types = []  # type: List[Argument]
        stars_seen = set()  # type: Set[str]
        while self.lookup() != ')':
            if self.lookup() == '*':
                self.expect('*')
                if self.lookup() == '*':
                    if '**' in stars_seen:
                        self.fail()
                    self.expect('*')
                    star_star = True
                else:
                    if stars_seen:
                        self.fail()
                    star_star = False
                arg_type = self.parse_type()
                if star_star:
                    arg_types.append(Argument(arg_type, ARG_STARSTAR))
                    stars_seen.add('**')
                else:
                    arg_types.append(Argument(arg_type, ARG_STAR))
                    stars_seen.add('*')
            else:
                if stars_seen:
                    self.fail()
                arg_type = self.parse_type()
                arg_types.append(Argument(arg_type, ARG_POS))
            if self.lookup() == ',':
                self.expect(',')
            elif self.lookup() == ')':
                break
        self.expect(')')
        self.expect('->')
        ret_type = self.parse_type()
        if not isinstance(self.next(), End):
            self.fail()
        return arg_types, ret_type

    def parse_type_list(self):
        # type: () -> List[AbstractType]
        types = []
        while self.lookup() not in (')', ']'):
            typ = self.parse_type()
            types.append(typ)
            if self.lookup() == ',':
                self.expect(',')
            elif self.lookup() not in (')', ']'):
                self.fail()
        return types

    def parse_type(self):
        # type: () -> AbstractType
        t = self.next()
        if not isinstance(t, DottedName):
            self.fail()
        if t.text == 'Any':
            return AnyType()
        elif t.text == 'mypy_extensions.NoReturn':
            return NoReturnType()
        elif t.text == 'Tuple':
            self.expect('[')
            args = self.parse_type_list()
            self.expect(']')
            return TupleType(args)
        elif t.text == 'Union':
            self.expect('[')
            items = self.parse_type_list()
            self.expect(']')
            if len(items) == 1:
                return items[0]
            elif len(items) == 0:
                self.fail()
            else:
                return UnionType(items)
        else:
            if self.lookup() == '[':
                self.expect('[')
                args = self.parse_type_list()
                self.expect(']')
                if t.text == 'Optional' and len(args) == 1:
                    return UnionType([args[0], ClassType('None')])
                return ClassType(t.text, args)
            else:
                return ClassType(t.text)

    def expect(self, s):
        # type: (str) -> None
        if self.tokens[self.i].text != s:
            self.fail()
        self.i += 1

    def lookup(self):
        # type: () -> str
        return self.tokens[self.i].text

    def next(self):
        # type: () -> Token
        token = self.tokens[self.i]
        self.i += 1
        return token

    def fail(self):
        # type: () -> NoReturn
        raise ParseError(self.comment)


def encode(s):
    # type: (Text) -> str
    if PY2:
        return s.encode('ascii')
    else:
        return s