File: test_locale.py

package info (click to toggle)
pypy3 7.3.19%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 212,236 kB
  • sloc: python: 2,098,316; ansic: 540,565; sh: 21,462; asm: 14,419; cpp: 4,451; makefile: 4,209; objc: 761; xml: 530; exp: 499; javascript: 314; pascal: 244; lisp: 45; csh: 12; awk: 4
file content (88 lines) | stat: -rw-r--r-- 3,711 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# coding: utf-8
import pytest
from pypy.module._codecs import interp_codecs
from pypy.module._codecs.locale import (
    str_decode_locale_surrogateescape, utf8_encode_locale_surrogateescape,
    str_decode_locale_strict, utf8_encode_locale_strict)
from rpython.rlib import rlocale
from pypy.interpreter import unicodehelper

class TestLocaleCodec(object):

    def setup_class(cls):
        from rpython.rlib import rlocale
        cls.oldlocale = rlocale.setlocale(rlocale.LC_ALL, None)

    def teardown_class(cls):
        if hasattr(cls, 'oldlocale'):
            from rpython.rlib import rlocale
            rlocale.setlocale(rlocale.LC_ALL, cls.oldlocale)

    def getdecoder(self, encoding):
        return getattr(unicodehelper, "str_decode_%s" % encoding.replace("-", ""))

    def getencoder(self, encoding):
        return getattr(unicodehelper,
                       "utf8_encode_%s" % encoding.replace("-", "_"))

    def getstate(self):
        return self.space.fromcache(interp_codecs.CodecState)

    def setlocale(self, locale):
        from rpython.rlib import rlocale
        try:
            rlocale.setlocale(rlocale.LC_ALL, locale)
        except rlocale.LocaleError:
            pytest.skip("%s locale unsupported" % locale)

    def test_encode_locale(self):
        self.setlocale("en_US.UTF-8")
        for locale_encoder in (utf8_encode_locale_surrogateescape,
                               utf8_encode_locale_strict):
            for val in u'foo', u' 日本', u'\U0001320C':
                utf8 = val.encode('utf-8')
                encoded = locale_encoder(utf8, len(val))
                assert encoded.decode('utf8') == val

    def test_encode_locale_errorhandler(self):
        self.setlocale("en_US.UTF-8")
        locale_encoder = utf8_encode_locale_surrogateescape
        utf8_encoder = self.getencoder('utf-8')
        encode_error_handler = self.getstate().encode_error_handler
        for val in u'foo\udc80bar', u'\udcff\U0001320C':
            s = val.encode('utf8')
            w_s = self.space.newtext(s)
            expected = utf8_encoder(self.space, s, w_s, 'surrogateescape',
                                    encode_error_handler)
            utf8 = val.encode('utf-8')
            assert locale_encoder(utf8, len(val)) == expected

    def test_decode_locale(self):
        self.setlocale("en_US.UTF-8")
        utf8_decoder = self.getdecoder('utf-8')
        for locale_decoder in (str_decode_locale_surrogateescape,
                               str_decode_locale_strict):
            for val in 'foo', ' \xe6\x97\xa5\xe6\x9c\xac', '\xf0\x93\x88\x8c':
                w_s = self.space.newbytes(val)
                assert (locale_decoder(val) ==
                                utf8_decoder(self.space, val, w_s, 'strict',
                                             True, None)[:2])

    @pytest.mark.parametrize('locale_decoder',
                 (str_decode_locale_surrogateescape, str_decode_locale_strict))
    def test_decode_locale_latin1(self, locale_decoder):
        self.setlocale("fr_FR")
        uni = u"août"
        string = uni.encode('latin1')
        assert locale_decoder(string) == (uni.encode('utf8'), len(uni))

    def test_decode_locale_errorhandler(self):
        self.setlocale("en_US.UTF-8")
        locale_decoder = str_decode_locale_surrogateescape
        utf8_decoder = self.getdecoder('utf-8')
        decode_error_handler = self.getstate().decode_error_handler
        val = 'foo\xe3bar'
        w_val = self.space.newbytes(val)
        expected = utf8_decoder(self.space, val, w_val, 'surrogateescape', True,
                                decode_error_handler)
        assert locale_decoder(val) == expected[:2]