File: utils.py

package info (click to toggle)
python-libcst 1.4.0-1.2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,928 kB
  • sloc: python: 76,235; makefile: 10; sh: 2
file content (218 lines) | stat: -rw-r--r-- 7,565 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
#
# Modifications:
# Copyright David Halter and Contributors
# Modifications are dual-licensed: MIT and PSF.
# 99% of the code is different from pgen2, now.
#
# A fork of `parso.utils`.
# https://github.com/davidhalter/parso/blob/master/parso/utils.py
#
# The following changes were made:
# - Drop Python 2 compatibility layer
# - Use dataclasses instead of namedtuple
# - Apply type hints directly to files
# - Make PythonVersionInfo directly usable in hashmaps
# - Unroll total ordering because Pyre doesn't understand it


import re
import sys
from ast import literal_eval
from dataclasses import dataclass
from typing import Optional, Sequence, Tuple, Union

# The following is a list in Python that are line breaks in str.splitlines, but
# not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed,
# 0xA) are allowed to split lines.
_NON_LINE_BREAKS = (
    "\v",  # Vertical Tabulation 0xB
    "\f",  # Form Feed 0xC
    "\x1C",  # File Separator
    "\x1D",  # Group Separator
    "\x1E",  # Record Separator
    "\x85",  # Next Line (NEL - Equivalent to CR+LF.
    # Used to mark end-of-line on some IBM mainframes.)
    "\u2028",  # Line Separator
    "\u2029",  # Paragraph Separator
)


@dataclass(frozen=True)
class Version:
    major: int
    minor: int
    micro: int


def split_lines(string: str, keepends: bool = False) -> Sequence[str]:
    r"""
    Intended for Python code. In contrast to Python's :py:meth:`str.splitlines`,
    looks at form feeds and other special characters as normal text. Just
    splits ``\n`` and ``\r\n``.
    Also different: Returns ``[""]`` for an empty string input.

    In Python 2.7 form feeds are used as normal characters when using
    str.splitlines. However in Python 3 somewhere there was a decision to split
    also on form feeds.
    """
    if keepends:
        lst = string.splitlines(True)

        # We have to merge lines that were broken by form feed characters.
        merge = []
        for i, line in enumerate(lst):
            try:
                last_chr = line[-1]
            except IndexError:
                pass
            else:
                if last_chr in _NON_LINE_BREAKS:
                    merge.append(i)

        for index in reversed(merge):
            try:
                lst[index] = lst[index] + lst[index + 1]
                del lst[index + 1]
            except IndexError:
                # index + 1 can be empty and therefore there's no need to
                # merge.
                pass

        # The stdlib's implementation of the end is inconsistent when calling
        # it with/without keepends. One time there's an empty string in the
        # end, one time there's none.
        if string.endswith("\n") or string.endswith("\r") or string == "":
            lst.append("")
        return lst
    else:
        return re.split(r"\n|\r\n|\r", string)


def python_bytes_to_unicode(
    source: Union[str, bytes], encoding: str = "utf-8", errors: str = "strict"
) -> str:
    """
    Checks for unicode BOMs and PEP 263 encoding declarations. Then returns a
    unicode object like in :py:meth:`bytes.decode`.

    :param encoding: See :py:meth:`bytes.decode` documentation.
    :param errors: See :py:meth:`bytes.decode` documentation. ``errors`` can be
        ``'strict'``, ``'replace'`` or ``'ignore'``.
    """

    def detect_encoding() -> Union[str, bytes]:
        """
        For the implementation of encoding definitions in Python, look at:
        - http://www.python.org/dev/peps/pep-0263/
        - http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations
        """
        byte_mark = literal_eval(r"b'\xef\xbb\xbf'")
        if source.startswith(byte_mark):
            # UTF-8 byte-order mark
            return b"utf-8"

        # pyre-ignore Pyre can't see that Union[str, bytes] conforms to AnyStr.
        first_two_match = re.match(rb"(?:[^\n]*\n){0,2}", source)
        if first_two_match is None:
            return encoding
        first_two_lines = first_two_match.group(0)
        possible_encoding = re.search(rb"coding[=:]\s*([-\w.]+)", first_two_lines)
        if possible_encoding:
            return possible_encoding.group(1)
        else:
            # the default if nothing else has been set -> PEP 263
            return encoding

    if isinstance(source, str):
        # only cast bytes
        return source

    actual_encoding = detect_encoding()
    if not isinstance(actual_encoding, str):
        actual_encoding = actual_encoding.decode("utf-8", "replace")

    # Cast to str
    return source.decode(actual_encoding, errors)


@dataclass(frozen=True)
class PythonVersionInfo:
    major: int
    minor: int

    def __gt__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool:
        if isinstance(other, tuple):
            if len(other) != 2:
                raise ValueError("Can only compare to tuples of length 2.")
            return (self.major, self.minor) > other

        return (self.major, self.minor) > (other.major, other.minor)

    def __ge__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool:
        return self.__gt__(other) or self.__eq__(other)

    def __lt__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool:
        if isinstance(other, tuple):
            if len(other) != 2:
                raise ValueError("Can only compare to tuples of length 2.")
            return (self.major, self.minor) < other

        return (self.major, self.minor) < (other.major, other.minor)

    def __le__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool:
        return self.__lt__(other) or self.__eq__(other)

    def __eq__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool:
        if isinstance(other, tuple):
            if len(other) != 2:
                raise ValueError("Can only compare to tuples of length 2.")
            return (self.major, self.minor) == other

        return (self.major, self.minor) == (other.major, other.minor)

    def __ne__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool:
        return not self.__eq__(other)

    def __hash__(self) -> int:
        return hash((self.major, self.minor))


def _parse_version(version: str) -> PythonVersionInfo:
    match = re.match(r"(\d+)(?:\.(\d+)(?:\.\d+)?)?$", version)
    if match is None:
        raise ValueError(
            (
                "The given version is not in the right format. "
                + 'Use something like "3.2" or "3".'
            )
        )

    major = int(match.group(1))
    minor = match.group(2)
    if minor is None:
        # Use the latest Python in case it's not exactly defined, because the
        # grammars are typically backwards compatible?
        if major == 2:
            minor = "7"
        elif major == 3:
            minor = "6"
        else:
            raise NotImplementedError(
                "Sorry, no support yet for those fancy new/old versions."
            )
    minor = int(minor)
    return PythonVersionInfo(major, minor)


def parse_version_string(version: Optional[str] = None) -> PythonVersionInfo:
    """
    Checks for a valid version number (e.g. `3.2` or `2.7.1` or `3`) and
    returns a corresponding version info that is always two characters long in
    decimal.
    """
    if version is None:
        version = "%s.%s" % sys.version_info[:2]

    return _parse_version(version)