File: test_c_codecs.py

package info (click to toggle)
pypy3 7.0.0%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 111,848 kB
  • sloc: python: 1,291,746; ansic: 74,281; asm: 5,187; cpp: 3,017; sh: 2,533; makefile: 544; xml: 243; lisp: 45; csh: 21; awk: 4
file content (131 lines) | stat: -rw-r--r-- 3,814 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import py
from pypy.module._multibytecodec.c_codecs import getcodec, codecs
from pypy.module._multibytecodec.c_codecs import decode, encode
from pypy.module._multibytecodec.c_codecs import EncodeDecodeError
from pypy.module._multibytecodec import c_codecs


def test_codecs_existence():
    for name in codecs:
        c = getcodec(name)
        assert c
    py.test.raises(KeyError, getcodec, "foobar")

def test_decode_gbk():
    c = getcodec("gbk")
    u = decode(c, "\xA1\xAA")
    assert u == unichr(0x2014)
    u = decode(c, "foobar")
    assert u == u"foobar"

def test_decode_hz():
    # stateful
    c = getcodec("hz")
    u = decode(c, "~{abc}")
    assert u == u'\u5f95\u6cef'
    u = decode(c, "~{")
    assert u == u''

def test_decodeex_hz():
    c = getcodec("hz")
    decodebuf = c_codecs.pypy_cjk_dec_new(c)
    u = c_codecs.decodeex(decodebuf, "~{abcd~}")
    assert u == u'\u5f95\u6c85'
    u = c_codecs.decodeex(decodebuf, "~{efgh~}")
    assert u == u'\u5f50\u73b7'
    u = c_codecs.decodeex(decodebuf, "!~{abcd~}xyz~{efgh")
    assert u == u'!\u5f95\u6c85xyz\u5f50\u73b7'
    c_codecs.pypy_cjk_dec_free(decodebuf)

def test_decodeex_hz_incomplete():
    c = getcodec("hz")
    decodebuf = c_codecs.pypy_cjk_dec_new(c)
    buf = ''
    for c, output in zip("!~{abcd~}xyz~{efgh",
          [u'!',  # !
           u'',   # ~
           u'',   # {
           u'',   # a
           u'\u5f95',   # b
           u'',   # c
           u'\u6c85',   # d
           u'',   # ~
           u'',   # }
           u'x',  # x
           u'y',  # y
           u'z',  # z
           u'',   # ~
           u'',   # {
           u'',   # e
           u'\u5f50',   # f
           u'',   # g
           u'\u73b7',   # h
           ]):
        buf += c
        u = c_codecs.decodeex(decodebuf, buf,
                              ignore_error = c_codecs.MBERR_TOOFEW)
        assert u == output
        incompletepos = c_codecs.pypy_cjk_dec_inbuf_consumed(decodebuf)
        buf = buf[incompletepos:]
    assert buf == ''
    c_codecs.pypy_cjk_dec_free(decodebuf)

def test_decode_hz_error():
    # error
    c = getcodec("hz")
    e = py.test.raises(EncodeDecodeError, decode, c, "~{}").value
    assert e.start == 2
    assert e.end == 3
    assert e.reason == "incomplete multibyte sequence"
    #
    e = py.test.raises(EncodeDecodeError, decode, c, "~{xyz}").value
    assert e.start == 2
    assert e.end == 3
    assert e.reason == "illegal multibyte sequence"

def test_decode_hz_ignore():
    c = getcodec("hz")
    u = decode(c, 'def~{}abc', 'ignore')
    assert u == u'def\u5f95'

def test_decode_hz_replace():
    c = getcodec("hz")
    u = decode(c, 'def~{}abc', 'replace')
    assert u == u'def\ufffd\u5f95\ufffd'

def test_encode_hz():
    c = getcodec("hz")
    s = encode(c, u'foobar')
    assert s == 'foobar' and type(s) is str
    s = encode(c, u'\u5f95\u6cef')
    assert s == '~{abc}~}'

def test_encode_hz_error():
    # error
    c = getcodec("hz")
    e = py.test.raises(EncodeDecodeError, encode, c, u'abc\u1234def').value
    assert e.start == 3
    assert e.end == 4
    assert e.reason == "illegal multibyte sequence"

def test_encode_hz_ignore():
    c = getcodec("hz")
    s = encode(c, u'abc\u1234def', 'ignore')
    assert s == 'abcdef'

def test_encode_hz_replace():
    c = getcodec("hz")
    s = encode(c, u'abc\u1234def', 'replace')
    assert s == 'abc?def'

def test_encode_jisx0208():
    c = getcodec('iso2022_jp')
    s = encode(c, u'\u83ca\u5730\u6642\u592b')
    assert s == '\x1b$B5FCO;~IW\x1b(B' and type(s) is str

def test_encode_custom_error_handler_bytes():
    c = getcodec("hz")
    def errorhandler(errors, enc, msg, t, startingpos, endingpos):
        return None, '\xc3', endingpos
    s = encode(c, u'abc\u1234def', 'foo', errorhandler)
    assert '\xc3' in s