1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
|
from pytokens import tokenize, Token, TokenType as T
def test_tokenize() -> None:
source = "def foo():\n 7.e1\n"
tokens = list(tokenize(source))
assert tokens == [
Token(T.identifier, 0, 3, start_line=1, start_col=0, end_line=1, end_col=3),
Token(T.whitespace, 3, 4, start_line=1, start_col=3, end_line=1, end_col=4),
Token(T.identifier, 4, 7, start_line=1, start_col=4, end_line=1, end_col=7),
Token(T.lparen, 7, 8, start_line=1, start_col=7, end_line=1, end_col=8),
Token(T.rparen, 8, 9, start_line=1, start_col=8, end_line=1, end_col=9),
Token(T.op, 9, 10, start_line=1, start_col=9, end_line=1, end_col=10),
Token(T.newline, 10, 11, start_line=1, start_col=10, end_line=1, end_col=11),
Token(T.indent, 11, 15, start_line=2, start_col=0, end_line=2, end_col=4),
Token(T.number, 15, 19, start_line=2, start_col=4, end_line=2, end_col=8),
Token(T.newline, 19, 20, start_line=2, start_col=8, end_line=2, end_col=9),
Token(T.dedent, 20, 20, start_line=3, start_col=0, end_line=3, end_col=0),
Token(T.endmarker, 20, 20, start_line=3, start_col=0, end_line=3, end_col=0),
]
# https://github.com/psf/black/issues/3700
source = "{\r}"
tokens = list(tokenize(source))
assert tokens == [
Token(T.lbrace, 0, 1, start_line=1, start_col=0, end_line=1, end_col=1),
Token(T.whitespace, 1, 2, start_line=1, start_col=1, end_line=1, end_col=2),
Token(T.rbrace, 2, 3, start_line=1, start_col=2, end_line=1, end_col=3),
Token(T.newline, 3, 4, start_line=1, start_col=3, end_line=1, end_col=4),
Token(T.endmarker, 4, 4, start_line=2, start_col=0, end_line=2, end_col=0),
]
source = "€€, x🐍y = 1, 2"
tokens = list(tokenize(source))
assert tokens == [
Token(T.identifier, 0, 2, start_line=1, start_col=0, end_line=1, end_col=2),
Token(T.op, 2, 3, start_line=1, start_col=2, end_line=1, end_col=3),
Token(T.whitespace, 3, 4, start_line=1, start_col=3, end_line=1, end_col=4),
Token(T.identifier, 4, 7, start_line=1, start_col=4, end_line=1, end_col=7),
Token(T.whitespace, 7, 8, start_line=1, start_col=7, end_line=1, end_col=8),
Token(T.op, 8, 9, start_line=1, start_col=8, end_line=1, end_col=9),
Token(T.whitespace, 9, 10, start_line=1, start_col=9, end_line=1, end_col=10),
Token(T.number, 10, 11, start_line=1, start_col=10, end_line=1, end_col=11),
Token(T.op, 11, 12, start_line=1, start_col=11, end_line=1, end_col=12),
Token(T.whitespace, 12, 13, start_line=1, start_col=12, end_line=1, end_col=13),
Token(T.number, 13, 14, start_line=1, start_col=13, end_line=1, end_col=14),
Token(T.newline, 14, 15, start_line=1, start_col=14, end_line=1, end_col=15),
Token(T.endmarker, 15, 15, start_line=2, start_col=0, end_line=2, end_col=0),
]
source = r'''rf"\N{42}"'''
tokens = list(tokenize(source))
assert tokens == [
Token(T.fstring_start, 0, 3, start_line=1, start_col=0, end_line=1, end_col=3),
Token(T.fstring_middle, 3, 5, start_line=1, start_col=3, end_line=1, end_col=5),
Token(T.lbrace, 5, 6, start_line=1, start_col=5, end_line=1, end_col=6),
Token(T.number, 6, 8, start_line=1, start_col=6, end_line=1, end_col=8),
Token(T.rbrace, 8, 9, start_line=1, start_col=8, end_line=1, end_col=9),
Token(T.fstring_end, 9, 10, start_line=1, start_col=9, end_line=1, end_col=10),
Token(T.newline, 10, 11, start_line=1, start_col=10, end_line=1, end_col=11),
Token(T.endmarker, 11, 11, start_line=2, start_col=0, end_line=2, end_col=0),
]
def test_weird_op_case() -> None:
source = "\n#\r0"
tokens = list(tokenize(source))
assert tokens == [
Token(T.nl, 0, 1, start_line=1, start_col=0, end_line=1, end_col=1),
Token(T.comment, 1, 4, start_line=2, start_col=0, end_line=2, end_col=3),
Token(T.nl, 4, 5, start_line=2, start_col=3, end_line=2, end_col=4),
Token(T.endmarker, 5, 5, start_line=3, start_col=0, end_line=3, end_col=0),
]
source = "\n\r0"
tokens = list(tokenize(source))
assert tokens == [
Token(T.nl, 0, 1, start_line=1, start_col=0, end_line=1, end_col=1),
Token(T.whitespace, 1, 2, start_line=2, start_col=0, end_line=2, end_col=1),
Token(T.number, 2, 3, start_line=2, start_col=1, end_line=2, end_col=2),
Token(T.newline, 3, 4, start_line=2, start_col=2, end_line=2, end_col=3),
Token(T.endmarker, 4, 4, start_line=3, start_col=0, end_line=3, end_col=0),
]
def test_nested_f_tstrings() -> None:
source = '''t"foo {f'bar'} baz"'''
tokens = list(tokenize(source))
assert tokens == [
Token(T.tstring_start, 0, 2, start_line=1, start_col=0, end_line=1, end_col=2),
Token(T.tstring_middle, 2, 6, start_line=1, start_col=2, end_line=1, end_col=6),
Token(T.lbrace, 6, 7, start_line=1, start_col=6, end_line=1, end_col=7),
Token(T.fstring_start, 7, 9, start_line=1, start_col=7, end_line=1, end_col=9),
Token(
T.fstring_middle, 9, 12, start_line=1, start_col=9, end_line=1, end_col=12
),
Token(
T.fstring_end, 12, 13, start_line=1, start_col=12, end_line=1, end_col=13
),
Token(T.rbrace, 13, 14, start_line=1, start_col=13, end_line=1, end_col=14),
Token(
T.tstring_middle, 14, 18, start_line=1, start_col=14, end_line=1, end_col=18
),
Token(
T.tstring_end, 18, 19, start_line=1, start_col=18, end_line=1, end_col=19
),
Token(T.newline, 19, 20, start_line=1, start_col=19, end_line=1, end_col=20),
Token(T.endmarker, 20, 20, start_line=2, start_col=0, end_line=2, end_col=0),
]
def test_dedent_after_escaped_nl() -> None:
source = "if True:\n if \\\nTrue:\n pass\n pass\npass"
tokens = list(tokenize(source))
assert tokens == [
Token(T.identifier, 0, 2, start_line=1, start_col=0, end_line=1, end_col=2),
Token(T.whitespace, 2, 3, start_line=1, start_col=2, end_line=1, end_col=3),
Token(T.identifier, 3, 7, start_line=1, start_col=3, end_line=1, end_col=7),
Token(T.op, 7, 8, start_line=1, start_col=7, end_line=1, end_col=8),
Token(T.newline, 8, 9, start_line=1, start_col=8, end_line=1, end_col=9),
Token(T.indent, 9, 13, start_line=2, start_col=0, end_line=2, end_col=4),
Token(T.identifier, 13, 15, start_line=2, start_col=4, end_line=2, end_col=6),
Token(T.whitespace, 15, 16, start_line=2, start_col=6, end_line=2, end_col=7),
Token(T.whitespace, 16, 18, start_line=2, start_col=7, end_line=3, end_col=0),
Token(T.identifier, 18, 22, start_line=3, start_col=0, end_line=3, end_col=4),
Token(T.op, 22, 23, start_line=3, start_col=4, end_line=3, end_col=5),
Token(T.newline, 23, 24, start_line=3, start_col=5, end_line=3, end_col=6),
Token(T.indent, 24, 32, start_line=4, start_col=0, end_line=4, end_col=8),
Token(T.identifier, 32, 36, start_line=4, start_col=8, end_line=4, end_col=12),
Token(T.newline, 36, 37, start_line=4, start_col=12, end_line=4, end_col=13),
Token(T.whitespace, 37, 41, start_line=5, start_col=0, end_line=5, end_col=4),
Token(T.dedent, 41, 41, start_line=5, start_col=4, end_line=5, end_col=4),
Token(T.identifier, 41, 45, start_line=5, start_col=4, end_line=5, end_col=8),
Token(T.newline, 45, 46, start_line=5, start_col=8, end_line=5, end_col=9),
Token(T.whitespace, 46, 46, start_line=6, start_col=0, end_line=6, end_col=0),
Token(T.dedent, 46, 46, start_line=6, start_col=0, end_line=6, end_col=0),
Token(T.identifier, 46, 50, start_line=6, start_col=0, end_line=6, end_col=4),
Token(T.newline, 50, 51, start_line=6, start_col=4, end_line=6, end_col=5),
Token(T.endmarker, 51, 51, start_line=7, start_col=0, end_line=7, end_col=0),
]
source = "if True:\n if \\\nTrue:\n pass\n pass"
tokens = list(tokenize(source))
assert tokens == [
Token(T.identifier, 0, 2, start_line=1, start_col=0, end_line=1, end_col=2),
Token(T.whitespace, 2, 3, start_line=1, start_col=2, end_line=1, end_col=3),
Token(T.identifier, 3, 7, start_line=1, start_col=3, end_line=1, end_col=7),
Token(T.op, 7, 8, start_line=1, start_col=7, end_line=1, end_col=8),
Token(T.newline, 8, 9, start_line=1, start_col=8, end_line=1, end_col=9),
Token(T.indent, 9, 13, start_line=2, start_col=0, end_line=2, end_col=4),
Token(T.identifier, 13, 15, start_line=2, start_col=4, end_line=2, end_col=6),
Token(T.whitespace, 15, 16, start_line=2, start_col=6, end_line=2, end_col=7),
Token(T.whitespace, 16, 18, start_line=2, start_col=7, end_line=3, end_col=0),
Token(T.identifier, 18, 22, start_line=3, start_col=0, end_line=3, end_col=4),
Token(T.op, 22, 23, start_line=3, start_col=4, end_line=3, end_col=5),
Token(T.newline, 23, 24, start_line=3, start_col=5, end_line=3, end_col=6),
Token(T.indent, 24, 32, start_line=4, start_col=0, end_line=4, end_col=8),
Token(T.identifier, 32, 36, start_line=4, start_col=8, end_line=4, end_col=12),
Token(T.newline, 36, 37, start_line=4, start_col=12, end_line=4, end_col=13),
Token(T.whitespace, 37, 41, start_line=5, start_col=0, end_line=5, end_col=4),
Token(T.dedent, 41, 41, start_line=5, start_col=4, end_line=5, end_col=4),
Token(T.identifier, 41, 45, start_line=5, start_col=4, end_line=5, end_col=8),
Token(T.newline, 45, 46, start_line=5, start_col=8, end_line=5, end_col=9),
Token(T.dedent, 46, 46, start_line=6, start_col=0, end_line=6, end_col=0),
Token(T.endmarker, 46, 46, start_line=6, start_col=0, end_line=6, end_col=0),
]
source = "if True:\n if \\\nTrue:\n pass\npass"
tokens = list(tokenize(source))
assert tokens == [
Token(T.identifier, 0, 2, start_line=1, start_col=0, end_line=1, end_col=2),
Token(T.whitespace, 2, 3, start_line=1, start_col=2, end_line=1, end_col=3),
Token(T.identifier, 3, 7, start_line=1, start_col=3, end_line=1, end_col=7),
Token(T.op, 7, 8, start_line=1, start_col=7, end_line=1, end_col=8),
Token(T.newline, 8, 9, start_line=1, start_col=8, end_line=1, end_col=9),
Token(T.indent, 9, 13, start_line=2, start_col=0, end_line=2, end_col=4),
Token(T.identifier, 13, 15, start_line=2, start_col=4, end_line=2, end_col=6),
Token(T.whitespace, 15, 16, start_line=2, start_col=6, end_line=2, end_col=7),
Token(T.whitespace, 16, 18, start_line=2, start_col=7, end_line=3, end_col=0),
Token(T.identifier, 18, 22, start_line=3, start_col=0, end_line=3, end_col=4),
Token(T.op, 22, 23, start_line=3, start_col=4, end_line=3, end_col=5),
Token(T.newline, 23, 24, start_line=3, start_col=5, end_line=3, end_col=6),
Token(T.indent, 24, 32, start_line=4, start_col=0, end_line=4, end_col=8),
Token(T.identifier, 32, 36, start_line=4, start_col=8, end_line=4, end_col=12),
Token(T.newline, 36, 37, start_line=4, start_col=12, end_line=4, end_col=13),
Token(T.whitespace, 37, 37, start_line=5, start_col=0, end_line=5, end_col=0),
Token(T.dedent, 37, 37, start_line=5, start_col=0, end_line=5, end_col=0),
Token(T.dedent, 37, 37, start_line=5, start_col=0, end_line=5, end_col=0),
Token(T.identifier, 37, 41, start_line=5, start_col=0, end_line=5, end_col=4),
Token(T.newline, 41, 42, start_line=5, start_col=4, end_line=5, end_col=5),
Token(T.endmarker, 42, 42, start_line=6, start_col=0, end_line=6, end_col=0),
]
source = "if True:\n if \\\nTrue:\n pass\n"
tokens = list(tokenize(source))
assert tokens == [
Token(T.identifier, 0, 2, start_line=1, start_col=0, end_line=1, end_col=2),
Token(T.whitespace, 2, 3, start_line=1, start_col=2, end_line=1, end_col=3),
Token(T.identifier, 3, 7, start_line=1, start_col=3, end_line=1, end_col=7),
Token(T.op, 7, 8, start_line=1, start_col=7, end_line=1, end_col=8),
Token(T.newline, 8, 9, start_line=1, start_col=8, end_line=1, end_col=9),
Token(T.indent, 9, 13, start_line=2, start_col=0, end_line=2, end_col=4),
Token(T.identifier, 13, 15, start_line=2, start_col=4, end_line=2, end_col=6),
Token(T.whitespace, 15, 16, start_line=2, start_col=6, end_line=2, end_col=7),
Token(T.whitespace, 16, 18, start_line=2, start_col=7, end_line=3, end_col=0),
Token(T.identifier, 18, 22, start_line=3, start_col=0, end_line=3, end_col=4),
Token(T.op, 22, 23, start_line=3, start_col=4, end_line=3, end_col=5),
Token(T.newline, 23, 24, start_line=3, start_col=5, end_line=3, end_col=6),
Token(T.indent, 24, 32, start_line=4, start_col=0, end_line=4, end_col=8),
Token(T.identifier, 32, 36, start_line=4, start_col=8, end_line=4, end_col=12),
Token(T.newline, 36, 37, start_line=4, start_col=12, end_line=4, end_col=13),
Token(T.dedent, 37, 37, start_line=5, start_col=0, end_line=5, end_col=0),
Token(T.dedent, 37, 37, start_line=5, start_col=0, end_line=5, end_col=0),
Token(T.endmarker, 37, 37, start_line=5, start_col=0, end_line=5, end_col=0),
]
|