1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
|
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
#
# Modifications:
# Copyright David Halter and Contributors
# Modifications are dual-licensed: MIT and PSF.
# 99% of the code is different from pgen2, now.
#
# A fork of `parso.utils`.
# https://github.com/davidhalter/parso/blob/master/parso/utils.py
#
# The following changes were made:
# - Drop Python 2 compatibility layer
# - Use dataclasses instead of namedtuple
# - Apply type hints directly to files
# - Make PythonVersionInfo directly usable in hashmaps
# - Unroll total ordering because Pyre doesn't understand it
import re
import sys
from ast import literal_eval
from dataclasses import dataclass
from typing import Optional, Sequence, Tuple, Union
# The following is a list in Python that are line breaks in str.splitlines, but
# not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed,
# 0xA) are allowed to split lines.
_NON_LINE_BREAKS = (
"\v", # Vertical Tabulation 0xB
"\f", # Form Feed 0xC
"\x1C", # File Separator
"\x1D", # Group Separator
"\x1E", # Record Separator
"\x85", # Next Line (NEL - Equivalent to CR+LF.
# Used to mark end-of-line on some IBM mainframes.)
"\u2028", # Line Separator
"\u2029", # Paragraph Separator
)
@dataclass(frozen=True)
class Version:
major: int
minor: int
micro: int
def split_lines(string: str, keepends: bool = False) -> Sequence[str]:
r"""
Intended for Python code. In contrast to Python's :py:meth:`str.splitlines`,
looks at form feeds and other special characters as normal text. Just
splits ``\n`` and ``\r\n``.
Also different: Returns ``[""]`` for an empty string input.
In Python 2.7 form feeds are used as normal characters when using
str.splitlines. However in Python 3 somewhere there was a decision to split
also on form feeds.
"""
if keepends:
lst = string.splitlines(True)
# We have to merge lines that were broken by form feed characters.
merge = []
for i, line in enumerate(lst):
try:
last_chr = line[-1]
except IndexError:
pass
else:
if last_chr in _NON_LINE_BREAKS:
merge.append(i)
for index in reversed(merge):
try:
lst[index] = lst[index] + lst[index + 1]
del lst[index + 1]
except IndexError:
# index + 1 can be empty and therefore there's no need to
# merge.
pass
# The stdlib's implementation of the end is inconsistent when calling
# it with/without keepends. One time there's an empty string in the
# end, one time there's none.
if string.endswith("\n") or string.endswith("\r") or string == "":
lst.append("")
return lst
else:
return re.split(r"\n|\r\n|\r", string)
def python_bytes_to_unicode(
source: Union[str, bytes], encoding: str = "utf-8", errors: str = "strict"
) -> str:
"""
Checks for unicode BOMs and PEP 263 encoding declarations. Then returns a
unicode object like in :py:meth:`bytes.decode`.
:param encoding: See :py:meth:`bytes.decode` documentation.
:param errors: See :py:meth:`bytes.decode` documentation. ``errors`` can be
``'strict'``, ``'replace'`` or ``'ignore'``.
"""
def detect_encoding() -> Union[str, bytes]:
"""
For the implementation of encoding definitions in Python, look at:
- http://www.python.org/dev/peps/pep-0263/
- http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations
"""
byte_mark = literal_eval(r"b'\xef\xbb\xbf'")
if source.startswith(byte_mark):
# UTF-8 byte-order mark
return b"utf-8"
# pyre-ignore Pyre can't see that Union[str, bytes] conforms to AnyStr.
first_two_match = re.match(rb"(?:[^\n]*\n){0,2}", source)
if first_two_match is None:
return encoding
first_two_lines = first_two_match.group(0)
possible_encoding = re.search(rb"coding[=:]\s*([-\w.]+)", first_two_lines)
if possible_encoding:
return possible_encoding.group(1)
else:
# the default if nothing else has been set -> PEP 263
return encoding
if isinstance(source, str):
# only cast bytes
return source
actual_encoding = detect_encoding()
if not isinstance(actual_encoding, str):
actual_encoding = actual_encoding.decode("utf-8", "replace")
# Cast to str
return source.decode(actual_encoding, errors)
@dataclass(frozen=True)
class PythonVersionInfo:
major: int
minor: int
def __gt__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool:
if isinstance(other, tuple):
if len(other) != 2:
raise ValueError("Can only compare to tuples of length 2.")
return (self.major, self.minor) > other
return (self.major, self.minor) > (other.major, other.minor)
def __ge__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool:
return self.__gt__(other) or self.__eq__(other)
def __lt__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool:
if isinstance(other, tuple):
if len(other) != 2:
raise ValueError("Can only compare to tuples of length 2.")
return (self.major, self.minor) < other
return (self.major, self.minor) < (other.major, other.minor)
def __le__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool:
return self.__lt__(other) or self.__eq__(other)
def __eq__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool:
if isinstance(other, tuple):
if len(other) != 2:
raise ValueError("Can only compare to tuples of length 2.")
return (self.major, self.minor) == other
return (self.major, self.minor) == (other.major, other.minor)
def __ne__(self, other: Union["PythonVersionInfo", Tuple[int, int]]) -> bool:
return not self.__eq__(other)
def __hash__(self) -> int:
return hash((self.major, self.minor))
def _parse_version(version: str) -> PythonVersionInfo:
match = re.match(r"(\d+)(?:\.(\d+)(?:\.\d+)?)?$", version)
if match is None:
raise ValueError(
(
"The given version is not in the right format. "
+ 'Use something like "3.2" or "3".'
)
)
major = int(match.group(1))
minor = match.group(2)
if minor is None:
# Use the latest Python in case it's not exactly defined, because the
# grammars are typically backwards compatible?
if major == 2:
minor = "7"
elif major == 3:
minor = "6"
else:
raise NotImplementedError(
"Sorry, no support yet for those fancy new/old versions."
)
minor = int(minor)
return PythonVersionInfo(major, minor)
def parse_version_string(version: Optional[str] = None) -> PythonVersionInfo:
"""
Checks for a valid version number (e.g. `3.2` or `2.7.1` or `3`) and
returns a corresponding version info that is always two characters long in
decimal.
"""
if version is None:
version = "%s.%s" % sys.version_info[:2]
return _parse_version(version)
|