File: test_unicode.py

package info (click to toggle)
python-sdjson 0.5.0-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 560 kB
  • sloc: python: 1,566; makefile: 6; sh: 6
file content (107 lines) | stat: -rw-r--r-- 3,001 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# stdlib
import codecs
from collections import OrderedDict

# 3rd party
import pytest

# this package
import sdjson

# test_encoding1 and test_encoding2 from 2.x are irrelevant (only str
# is supported as input, not bytes).


def test_encoding3() -> None:
	u = "αΩ"
	j = sdjson.dumps(u)
	assert j == '"\\u03b1\\u03a9"'


def test_encoding4() -> None:
	u = "αΩ"
	j = sdjson.dumps([u])
	assert j == '["\\u03b1\\u03a9"]'


def test_encoding5() -> None:
	u = "αΩ"
	j = sdjson.dumps(u, ensure_ascii=False)
	assert j == f'"{u}"'


def test_encoding6() -> None:
	u = "αΩ"
	j = sdjson.dumps([u], ensure_ascii=False)
	assert j == f'["{u}"]'


def test_big_unicode_encode() -> None:
	u = '𝄠'
	assert sdjson.dumps(u) == '"\\ud834\\udd20"'
	assert sdjson.dumps(u, ensure_ascii=False) == '"𝄠"'


def test_big_unicode_decode() -> None:
	u = "z𝄠x"
	assert sdjson.loads('"' + u + '"') == u
	assert sdjson.loads('"z\\ud834\\udd20x"') == u


def test_unicode_decode() -> None:
	for i in range(0, 0xd7ff):
		u = chr(i)
		s = f'"\\u{i:04x}"'
		assert sdjson.loads(s) == u


def test_unicode_preservation() -> None:
	assert type(sdjson.loads('""')) == str
	assert type(sdjson.loads('"a"')) == str
	assert type(sdjson.loads('["a"]')[0]) == str


def test_bytes_encode() -> None:
	with pytest.raises(TypeError):
		sdjson.dumps(b"hi")
	with pytest.raises(TypeError):
		sdjson.dumps([b"hi"])


def test_bytes_decode() -> None:
	for encoding, bom in [
		("utf-8", codecs.BOM_UTF8),
		("utf-16be", codecs.BOM_UTF16_BE),
		("utf-16le", codecs.BOM_UTF16_LE),
		("utf-32be", codecs.BOM_UTF32_BE),
		("utf-32le", codecs.BOM_UTF32_LE),
		]:
		data = ["aµ€𝄠"]
		encoded = sdjson.dumps(data).encode(encoding)
		assert sdjson.loads(bom + encoded) == data
		assert sdjson.loads(encoded) == data
	with pytest.raises(UnicodeDecodeError):
		sdjson.loads(b'["\x80"]')
	# RFC-7159 and ECMA-404 extend JSON to allow documents that
	# consist of only a string, which can present a special case
	# not covered by the encoding detection patterns specified in
	# RFC-4627 for utf-16-le (XX 00 XX 00).
	assert sdjson.loads('"☀"'.encode("utf-16-le")) == '☀'
	# Encoding detection for small (<4) bytes objects
	# is implemented as a special case. RFC-7159 and ECMA-404
	# allow single codepoint JSON documents which are only two
	# bytes in utf-16 encodings w/o BOM.
	assert sdjson.loads(b"5\x00") == 5
	assert sdjson.loads(b"\x007") == 7
	assert sdjson.loads(b"57") == 57


def test_object_pairs_hook_with_unicode() -> None:
	s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'
	p = [("xkd", 1), ("kcw", 2), ("art", 3), ("hxm", 4), ("qrt", 5), ("pad", 6), ("hoy", 7)]
	assert sdjson.loads(s, object_pairs_hook=lambda x: x) == p
	od = sdjson.loads(s, object_pairs_hook=OrderedDict)
	assert od == OrderedDict(p)
	assert type(od) == OrderedDict
	# the object_pairs_hook takes priority over the object_hook
	assert sdjson.loads(s, object_pairs_hook=OrderedDict, object_hook=lambda x: None) == OrderedDict(p)