1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
|
from pypy.interpreter.pyparser import parsestring
import py, sys
class TestParsetring:
def parse_and_compare(self, literal, value, encoding=None):
space = self.space
w_ret = parsestring.parsestr(space, encoding, literal)
if isinstance(value, str):
assert space.type(w_ret) == space.w_bytes
assert space.bytes_w(w_ret) == value
elif isinstance(value, unicode):
assert space.type(w_ret) == space.w_unicode
assert space.unicode_w(w_ret) == value
else:
assert False
def test_simple(self):
space = self.space
for s in ['hello world', 'hello\n world']:
self.parse_and_compare('b' + repr(s), s)
self.parse_and_compare("b'''hello\\x42 world'''", 'hello\x42 world')
# octal
self.parse_and_compare(r'b"\0"', chr(0))
self.parse_and_compare(r'br"\0"', '\\0')
self.parse_and_compare(r'rb"\0"', '\\0')
self.parse_and_compare(r'b"\07"', chr(7))
self.parse_and_compare(r'b"\123"', chr(0123))
self.parse_and_compare(r'b"\400"', chr(0))
self.parse_and_compare(r'b"\9"', '\\' + '9')
self.parse_and_compare(r'b"\08"', chr(0) + '8')
# hexadecimal
self.parse_and_compare(r'b"\xfF"', chr(0xFF))
self.parse_and_compare(r'b"\""', '"')
self.parse_and_compare(r"b'\''", "'")
for s in (r'b"\x"', r'b"\x7"', r'b"\x7g"'):
space.raises_w(space.w_ValueError,
parsestring.parsestr, space, None, s)
# only ASCII characters are allowed in bytes literals (but of course
# you can use escapes to get the non-ASCII ones (note that in the
# second case we use a raw string, the the parser actually sees the
# chars '\' 'x' 'e' '9'
space.raises_w(space.w_SyntaxError,
parsestring.parsestr, space, None, "b'\xe9'")
self.parse_and_compare(r"b'\xe9'", chr(0xE9))
def test_unicode(self):
space = self.space
for s in ['hello world', 'hello\n world']:
self.parse_and_compare(repr(s), unicode(s))
self.parse_and_compare("'''hello\\x42 world'''",
u'hello\x42 world')
self.parse_and_compare("'''hello\\u0842 world'''",
u'hello\u0842 world')
s = "u'\x81'"
s = s.decode("koi8-u").encode("utf8")[1:]
w_ret = parsestring.parsestr(self.space, 'koi8-u', s)
ret = space.unwrap(w_ret)
assert ret == eval("# -*- coding: koi8-u -*-\nu'\x81'")
def test_unicode_pep414(self):
space = self.space
for s in [u'hello world', u'hello\n world']:
self.parse_and_compare(repr(s), unicode(s))
self.parse_and_compare("u'''hello\\x42 world'''",
u'hello\x42 world')
self.parse_and_compare("u'''hello\\u0842 world'''",
u'hello\u0842 world')
space.raises_w(space.w_ValueError,
parsestring.parsestr, space, None, "ur'foo'")
def test_unicode_literals(self):
space = self.space
w_ret = parsestring.parsestr(space, None, repr("hello"))
assert space.isinstance_w(w_ret, space.w_unicode)
w_ret = parsestring.parsestr(space, None, "b'hi'")
assert space.isinstance_w(w_ret, space.w_bytes)
w_ret = parsestring.parsestr(space, None, "r'hi'")
assert space.isinstance_w(w_ret, space.w_unicode)
def test_raw_unicode_literals(self):
space = self.space
w_ret = parsestring.parsestr(space, None, "r'\u'")
assert space.int_w(space.len(w_ret)) == 2
def test_bytes(self):
space = self.space
b = "b'hello'"
w_ret = parsestring.parsestr(space, None, b)
assert space.unwrap(w_ret) == "hello"
b = "b'''hello'''"
w_ret = parsestring.parsestr(space, None, b)
assert space.unwrap(w_ret) == "hello"
def test_simple_enc_roundtrip(self):
space = self.space
s = "'\x81\\t'"
s = s.decode("koi8-u").encode("utf8")
w_ret = parsestring.parsestr(self.space, 'koi8-u', s)
ret = space.unwrap(w_ret)
assert ret == eval("# -*- coding: koi8-u -*-\nu'\x81\\t'")
def test_multiline_unicode_strings_with_backslash(self):
space = self.space
s = '"""' + '\\' + '\n"""'
w_ret = parsestring.parsestr(space, None, s)
assert space.str_w(w_ret) == ''
def test_bug1(self):
space = self.space
expected = ['x', ' ', chr(0xc3), chr(0xa9), ' ', '\n']
input = ["'", 'x', ' ', chr(0xc3), chr(0xa9), ' ', chr(92), 'n', "'"]
w_ret = parsestring.parsestr(space, 'utf8', ''.join(input))
assert space.str_w(w_ret) == ''.join(expected)
def test_wide_unicode_in_source(self):
if sys.maxunicode == 65535:
py.test.skip("requires a wide-unicode host")
self.parse_and_compare('"\xf0\x9f\x92\x8b"',
unichr(0x1f48b),
encoding='utf-8')
def test_decode_unicode_utf8(self):
buf = parsestring.decode_unicode_utf8(self.space,
'u"\xf0\x9f\x92\x8b"', 2, 6)
if sys.maxunicode == 65535:
assert buf == r"\U0000d83d\U0000dc8b"
else:
assert buf == r"\U0001f48b"
|