File: test_surrogateescape.py

package info (click to toggle)
python-future 0.18.2-6
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 4,264 kB
  • sloc: python: 43,246; makefile: 136; sh: 29
file content (142 lines) | stat: -rw-r--r-- 5,985 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# -*- coding: utf-8 -*-
"""
Tests for the surrogateescape codec
"""

from __future__ import absolute_import, division, unicode_literals
from future.builtins import (bytes, dict, int, range, round, str, super,
                             ascii, chr, hex, input, next, oct, open, pow,
                             filter, map, zip)
from future.utils.surrogateescape import register_surrogateescape
from future.tests.base import unittest, expectedFailurePY26, expectedFailurePY2


class TestSurrogateEscape(unittest.TestCase):
    def setUp(self):
        register_surrogateescape()

    @expectedFailurePY26    # Python 2.6 str.decode() takes no keyword args
    def test_surrogateescape(self):
        """
        From the backport of the email package
        """
        s = b'From: foo@bar.com\nTo: baz\nMime-Version: 1.0\nContent-Type: text/plain; charset=utf-8\nContent-Transfer-Encoding: base64\n\ncMO2c3RhbA\xc3\xa1=\n'
        u = 'From: foo@bar.com\nTo: baz\nMime-Version: 1.0\nContent-Type: text/plain; charset=utf-8\nContent-Transfer-Encoding: base64\n\ncMO2c3RhbA\udcc3\udca1=\n'
        s2 = s.decode('ASCII', errors='surrogateescape')
        self.assertEqual(s2, u)

    def test_encode_ascii_surrogateescape(self):
        """
        This crops up in the email module. It would be nice if it worked ...
        """
        payload = str(u'cMO2c3RhbA\udcc3\udca1=\n')
        b = payload.encode('ascii', 'surrogateescape')
        self.assertEqual(b, b'cMO2c3RhbA\xc3\xa1=\n')

    def test_encode_ascii_unicode(self):
        """
        Verify that exceptions are raised properly.
        """
        self.assertRaises(UnicodeEncodeError, u'\N{SNOWMAN}'.encode, 'US-ASCII', 'surrogateescape')

    @expectedFailurePY2
    def test_encode_ascii_surrogateescape_non_newstr(self):
        """
        As above but without a newstr object. Fails on Py2.
        """
        payload = u'cMO2c3RhbA\udcc3\udca1=\n'
        b = payload.encode('ascii', 'surrogateescape')
        self.assertEqual(b, b'cMO2c3RhbA\xc3\xa1=\n')


class SurrogateEscapeTest(unittest.TestCase):
    """
    These tests are from Python 3.3's test suite
    """
    def setUp(self):
        register_surrogateescape()

    def test_utf8(self):
        # Bad byte
        self.assertEqual(b"foo\x80bar".decode("utf-8", "surrogateescape"),
                         "foo\udc80bar")
        self.assertEqual(str("foo\udc80bar").encode("utf-8", "surrogateescape"),
                         b"foo\x80bar")
        # bad-utf-8 encoded surrogate
        # self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "surrogateescape"),
        #                  "\udced\udcb0\udc80")
        self.assertEqual(str("\udced\udcb0\udc80").encode("utf-8", "surrogateescape"),
                         b"\xed\xb0\x80")

    def test_ascii(self):
        # bad byte
        self.assertEqual(b"foo\x80bar".decode("ascii", "surrogateescape"),
                         "foo\udc80bar")
        # Fails:
        # self.assertEqual("foo\udc80bar".encode("ascii", "surrogateescape"),
        #                  b"foo\x80bar")

    @expectedFailurePY2
    def test_charmap(self):
        # bad byte: \xa5 is unmapped in iso-8859-3
        self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "surrogateescape"),
                         "foo\udca5bar")
        self.assertEqual("foo\udca5bar".encode("iso-8859-3", "surrogateescape"),
                         b"foo\xa5bar")

    def test_latin1(self):
        # Issue6373
        self.assertEqual("\udce4\udceb\udcef\udcf6\udcfc".encode("latin-1", "surrogateescape"),
                         b"\xe4\xeb\xef\xf6\xfc")

    # FIXME:
    @expectedFailurePY2
    def test_encoding_works_normally(self):
        """
        Test that encoding into various encodings (particularly utf-16)
        still works with the surrogateescape error handler in action ...
        """
        TEST_UNICODE_STR = u'ℝεα∂@ßʟ℮ ☂ℯṧт υηḯ¢☺ḓ℮'
        # Tk icon as a .gif:
        TEST_BYTE_STR = b'GIF89a\x0e\x00\x0b\x00\x80\xff\x00\xff\x00\x00\xc0\xc0\xc0!\xf9\x04\x01\x00\x00\x01\x00,\x00\x00\x00\x00\x0e\x00\x0b\x00@\x02\x1f\x0c\x8e\x10\xbb\xcan\x90\x99\xaf&\xd8\x1a\xce\x9ar\x06F\xd7\xf1\x90\xa1c\x9e\xe8\x84\x99\x89\x97\xa2J\x01\x00;\x1a\x14\x00;;\xba\nD\x14\x00\x00;;'
        # s1 = 'quéstionable'
        s1 = TEST_UNICODE_STR
        b1 = s1.encode('utf-8')
        b2 = s1.encode('utf-16')
        # b3 = s1.encode('latin-1')
        self.assertEqual(b1, str(s1).encode('utf-8', 'surrogateescape'))
        self.assertEqual(b2, str(s1).encode('utf-16', 'surrogateescape'))
        # self.assertEqual(b3, str(s1).encode('latin-1', 'surrogateescape'))

        s2 = 'きたないのよりきれいな方がいい'
        b4 = s2.encode('utf-8')
        b5 = s2.encode('utf-16')
        b6 = s2.encode('shift-jis')
        self.assertEqual(b4, str(s2).encode('utf-8', 'surrogateescape'))
        self.assertEqual(b5, str(s2).encode('utf-16', 'surrogateescape'))
        self.assertEqual(b6, str(s2).encode('shift-jis', 'surrogateescape'))

    def test_decoding_works_normally(self):
        """
        Test that decoding into various encodings (particularly utf-16)
        still works with the surrogateescape error handler in action ...
        """
        s1 = 'quéstionable'
        b1 = s1.encode('utf-8')
        b2 = s1.encode('utf-16')
        b3 = s1.encode('latin-1')
        self.assertEqual(s1, b1.decode('utf-8', 'surrogateescape'))
        self.assertEqual(s1, b2.decode('utf-16', 'surrogateescape'))
        self.assertEqual(s1, b3.decode('latin-1', 'surrogateescape'))

        s2 = '文'
        b4 = s2.encode('utf-8')
        b5 = s2.encode('utf-16')
        b6 = s2.encode('shift-jis')
        self.assertEqual(s2, b4.decode('utf-8', 'surrogateescape'))
        self.assertEqual(s2, b5.decode('utf-16', 'surrogateescape'))
        self.assertEqual(s2, b6.decode('shift-jis', 'surrogateescape'))


if __name__ == '__main__':
    unittest.main()