File: test_c_codecs.py

package info (click to toggle)
pypy3 7.3.19%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 212,236 kB
  • sloc: python: 2,098,316; ansic: 540,565; sh: 21,462; asm: 14,419; cpp: 4,451; makefile: 4,209; objc: 761; xml: 530; exp: 499; javascript: 314; pascal: 244; lisp: 45; csh: 12; awk: 4
file content (145 lines) | stat: -rw-r--r-- 4,716 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import py
import pytest
from pypy.module._multibytecodec.c_codecs import getcodec, codecs
from pypy.module._multibytecodec.c_codecs import decode, encode
from pypy.module._multibytecodec.c_codecs import EncodeDecodeError
from pypy.module._multibytecodec import c_codecs


def test_codecs_existence():
    for name in codecs:
        c = getcodec(name)
        assert c
    py.test.raises(KeyError, getcodec, "foobar")

def test_decode_gbk(space):
    c = getcodec("gbk")
    u = decode(space, c, "\xA1\xAA")
    assert u == unichr(0x2014).encode('utf8')
    u = decode(space, c, "foobar")
    assert u == "foobar"

@pytest.mark.parametrize('undecodable', [
    b"abc\x80\x80\xc1\xc4",
    b"\xff\x30\x81\x30", b"\x81\x30\xff\x30",  # bpo-29990
])
def test_decode_gb18030_error(space, undecodable):
    c = getcodec("gb18030")
    with pytest.raises(EncodeDecodeError):
        decode(space, c, undecodable)

def test_decode_hz(space):
    # stateful
    c = getcodec("hz")
    utf8 = decode(space, c, "~{abc}")
    assert utf8.decode('utf8') == u'\u5f95\u6cef'
    u = decode(space, c, "~{")
    assert u == u''

def test_decodeex_hz(space):
    c = getcodec("hz")
    decodebuf = c_codecs.pypy_cjk_dec_new(c)
    u = c_codecs.decodeex(space, decodebuf, "~{abcd~}")
    assert u == u'\u5f95\u6c85'.encode('utf8')
    u = c_codecs.decodeex(space, decodebuf, "~{efgh~}")
    assert u == u'\u5f50\u73b7'.encode('utf8')
    u = c_codecs.decodeex(space, decodebuf, "!~{abcd~}xyz~{efgh")
    assert u == u'!\u5f95\u6c85xyz\u5f50\u73b7'.encode('utf8')
    c_codecs.pypy_cjk_dec_free(decodebuf)

def test_decodeex_hz_incomplete(space):
    c = getcodec("hz")
    decodebuf = c_codecs.pypy_cjk_dec_new(c)
    buf = ''
    for c, output in zip("!~{abcd~}xyz~{efgh",
          [u'!',  # !
           u'',   # ~
           u'',   # {
           u'',   # a
           u'\u5f95',   # b
           u'',   # c
           u'\u6c85',   # d
           u'',   # ~
           u'',   # }
           u'x',  # x
           u'y',  # y
           u'z',  # z
           u'',   # ~
           u'',   # {
           u'',   # e
           u'\u5f50',   # f
           u'',   # g
           u'\u73b7',   # h
           ]):
        buf += c
        u = c_codecs.decodeex(space, decodebuf, buf,
                              ignore_error = c_codecs.MBERR_TOOFEW)
        assert u == output.encode('utf8')
        incompletepos = c_codecs.pypy_cjk_dec_inbuf_consumed(decodebuf)
        buf = buf[incompletepos:]
    assert buf == ''
    c_codecs.pypy_cjk_dec_free(decodebuf)

def test_decode_hz_error(space):
    # error
    c = getcodec("hz")
    e = py.test.raises(EncodeDecodeError, decode, space, c, "~{}").value
    assert e.start == 2
    assert e.end == 3
    assert e.reason == "incomplete multibyte sequence"
    #
    e = py.test.raises(EncodeDecodeError, decode, space, c, "~{xyz}").value
    assert e.start == 2
    assert e.end == 3
    assert e.reason == "illegal multibyte sequence"

def test_decode_hz_ignore(space):
    c = getcodec("hz")
    utf8 = decode(space, c, 'def~{}abc', 'ignore')
    assert utf8.decode('utf8') == u'def\u5f95'

def test_decode_hz_replace(space):
    c = getcodec("hz")
    utf8 = decode(space, c, 'def~{}abc', 'replace')
    assert utf8.decode('utf8') == u'def\ufffd\u5f95\ufffd'

def test_encode_hz(space):
    c = getcodec("hz")
    s = encode(space, c, u'foobar'.encode('utf8'), 6)
    assert s == 'foobar' and type(s) is str
    s = encode(space, c, u'\u5f95\u6cef'.encode('utf8'), 2)
    assert s == '~{abc}~}'
    # bpo-30003
    s = encode(space, c, 'ab~cd', 5)
    assert s == 'ab~~cd'

def test_encode_hz_error(space):
    # error
    c = getcodec("hz")
    e = py.test.raises(EncodeDecodeError, encode, space, c, u'abc\u1234def'.encode('utf8'), 7).value
    assert e.start == 3
    assert e.end == 4
    assert e.reason == "illegal multibyte sequence"

def test_encode_hz_ignore(space):
    c = getcodec("hz")
    s = encode(space, c, u'abc\u1234def'.encode('utf8'), 7, 'ignore')
    assert s == 'abcdef'

def test_encode_hz_replace(space):
    c = getcodec("hz")
    s = encode(space, c, u'abc\u1234def'.encode('utf8'), 7, 'replace')
    assert s == 'abc?def'

def test_encode_jisx0208(space):
    c = getcodec('iso2022_jp')
    s = encode(space, c, u'\u83ca\u5730\u6642\u592b'.encode('utf8'), 4)
    assert s == '\x1b$B5FCO;~IW\x1b(B' and type(s) is str

def test_encode_custom_error_handler_bytes(space):
    py.test.skip("needs revamping in py3k")
    c = getcodec("hz")
    def errorhandler(errors, enc, msg, w_t, startingpos, endingpos):
        return u'\xc3'.encode('utf8'), endingpos
    s = encode(space, c, u'abc\u1234def'.encode('utf8'), 7, 'foo', errorhandler)
    assert '\xc3' in s