1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
|
# -*- coding: utf-8 -*-
# :Project: python-rapidjson -- Unicode tests
# :Author: John Anderson <sontek@gmail.com>
# :License: MIT License
# :Copyright: © 2015 John Anderson
# :Copyright: © 2016, 2017, 2018, 2020 Lele Gaifax
#
import json
import pytest
import rapidjson
@pytest.mark.parametrize('u', [
'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}',
'\U0010ffff',
'asdf \U0010ffff \U0001ffff qwert \uffff \u10ff \u00ff \u0080 \u7fff \b\n\r',
])
def test_unicode(u, dumps, loads):
s = u.encode('utf-8')
ju = dumps(u)
js = dumps(s)
assert ju == js
assert ju.lower() == json.dumps(u).lower()
assert dumps(u, ensure_ascii=False) == json.dumps(u, ensure_ascii=False)
@pytest.mark.parametrize('o', [
"\ud80d",
{"foo": "\ud80d"},
{"\ud80d": "foo"},
])
def test_dump_surrogate(o, dumps):
with pytest.raises(UnicodeEncodeError, match="surrogates not allowed"):
dumps(o)
@pytest.mark.parametrize('j', [
'"\\ud80d"',
'{"foo": "\\ud80d"}',
'{"\\ud80d": "foo"}',
])
def test_load_surrogate(j, loads):
with pytest.raises(ValueError, match="surrogate pair in string is invalid"):
loads(j)
@pytest.mark.parametrize('j', [
'"\\udc00"',
'"\\udfff"',
])
def test_unicode_decode_error(j, loads):
with pytest.raises(rapidjson.JSONDecodeError,
match="The surrogate pair in string is invalid."):
loads(j)
def test_non_utf8_bytes(dumps):
value = b'\xff\xf0'
with pytest.raises(UnicodeDecodeError, match="'utf-8' codec can't decode byte"):
dumps(value)
|