File: test_surrogateescape.py

package info (click to toggle)
python-future 0.18.2-6
links: PTS, VCS
area: main
in suites: bookworm
size: 4,264 kB
sloc: python: 43,246; makefile: 136; sh: 29
file content (142 lines) | stat: -rw-r--r-- 5,985 bytes
parent folder | download | duplicates (2)
# -*- coding: utf-8 -*-
"""
Tests for the surrogateescape codec
"""

from __future__ import absolute_import, division, unicode_literals
from future.builtins import (bytes, dict, int, range, round, str, super,
                             ascii, chr, hex, input, next, oct, open, pow,
                             filter, map, zip)
from future.utils.surrogateescape import register_surrogateescape
from future.tests.base import unittest, expectedFailurePY26, expectedFailurePY2


class TestSurrogateEscape(unittest.TestCase):
    def setUp(self):
        register_surrogateescape()

    @expectedFailurePY26    # Python 2.6 str.decode() takes no keyword args
    def test_surrogateescape(self):
        """
        From the backport of the email package
        """
        s = b'From: foo@bar.com\nTo: baz\nMime-Version: 1.0\nContent-Type: text/plain; charset=utf-8\nContent-Transfer-Encoding: base64\n\ncMO2c3RhbA\xc3\xa1=\n'
        u = 'From: foo@bar.com\nTo: baz\nMime-Version: 1.0\nContent-Type: text/plain; charset=utf-8\nContent-Transfer-Encoding: base64\n\ncMO2c3RhbA\udcc3\udca1=\n'
        s2 = s.decode('ASCII', errors='surrogateescape')
        self.assertEqual(s2, u)

    def test_encode_ascii_surrogateescape(self):
        """
        This crops up in the email module. It would be nice if it worked ...
        """
        payload = str(u'cMO2c3RhbA\udcc3\udca1=\n')
        b = payload.encode('ascii', 'surrogateescape')
        self.assertEqual(b, b'cMO2c3RhbA\xc3\xa1=\n')

    def test_encode_ascii_unicode(self):
        """
        Verify that exceptions are raised properly.
        """
        self.assertRaises(UnicodeEncodeError, u'\N{SNOWMAN}'.encode, 'US-ASCII', 'surrogateescape')

    @expectedFailurePY2
    def test_encode_ascii_surrogateescape_non_newstr(self):
        """
        As above but without a newstr object. Fails on Py2.
        """
        payload = u'cMO2c3RhbA\udcc3\udca1=\n'
        b = payload.encode('ascii', 'surrogateescape')
        self.assertEqual(b, b'cMO2c3RhbA\xc3\xa1=\n')


class SurrogateEscapeTest(unittest.TestCase):
    """
    These tests are from Python 3.3's test suite
    """
    def setUp(self):
        register_surrogateescape()

    def test_utf8(self):
        # Bad byte
        self.assertEqual(b"foo\x80bar".decode("utf-8", "surrogateescape"),
                         "foo\udc80bar")
        self.assertEqual(str("foo\udc80bar").encode("utf-8", "surrogateescape"),
                         b"foo\x80bar")
        # bad-utf-8 encoded surrogate
        # self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "surrogateescape"),
        #                  "\udced\udcb0\udc80")
        self.assertEqual(str("\udced\udcb0\udc80").encode("utf-8", "surrogateescape"),
                         b"\xed\xb0\x80")

    def test_ascii(self):
        # bad byte
        self.assertEqual(b"foo\x80bar".decode("ascii", "surrogateescape"),
                         "foo\udc80bar")
        # Fails:
        # self.assertEqual("foo\udc80bar".encode("ascii", "surrogateescape"),
        #                  b"foo\x80bar")

    @expectedFailurePY2
    def test_charmap(self):
        # bad byte: \xa5 is unmapped in iso-8859-3
        self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "surrogateescape"),
                         "foo\udca5bar")
        self.assertEqual("foo\udca5bar".encode("iso-8859-3", "surrogateescape"),
                         b"foo\xa5bar")

    def test_latin1(self):
        # Issue6373
        self.assertEqual("\udce4\udceb\udcef\udcf6\udcfc".encode("latin-1", "surrogateescape"),
                         b"\xe4\xeb\xef\xf6\xfc")

    # FIXME:
    @expectedFailurePY2
    def test_encoding_works_normally(self):
        """
        Test that encoding into various encodings (particularly utf-16)
        still works with the surrogateescape error handler in action ...
        """
        TEST_UNICODE_STR = u'ℝεα∂@ßʟ℮ ☂ℯṧт υηḯ¢☺ḓ℮'
        # Tk icon as a .gif:
        TEST_BYTE_STR = b'GIF89a\x0e\x00\x0b\x00\x80\xff\x00\xff\x00\x00\xc0\xc0\xc0!\xf9\x04\x01\x00\x00\x01\x00,\x00\x00\x00\x00\x0e\x00\x0b\x00@\x02\x1f\x0c\x8e\x10\xbb\xcan\x90\x99\xaf&\xd8\x1a\xce\x9ar\x06F\xd7\xf1\x90\xa1c\x9e\xe8\x84\x99\x89\x97\xa2J\x01\x00;\x1a\x14\x00;;\xba\nD\x14\x00\x00;;'
        # s1 = 'quéstionable'
        s1 = TEST_UNICODE_STR
        b1 = s1.encode('utf-8')
        b2 = s1.encode('utf-16')
        # b3 = s1.encode('latin-1')
        self.assertEqual(b1, str(s1).encode('utf-8', 'surrogateescape'))
        self.assertEqual(b2, str(s1).encode('utf-16', 'surrogateescape'))
        # self.assertEqual(b3, str(s1).encode('latin-1', 'surrogateescape'))

        s2 = 'きたないのよりきれいな方がいい'
        b4 = s2.encode('utf-8')
        b5 = s2.encode('utf-16')
        b6 = s2.encode('shift-jis')
        self.assertEqual(b4, str(s2).encode('utf-8', 'surrogateescape'))
        self.assertEqual(b5, str(s2).encode('utf-16', 'surrogateescape'))
        self.assertEqual(b6, str(s2).encode('shift-jis', 'surrogateescape'))

    def test_decoding_works_normally(self):
        """
        Test that decoding into various encodings (particularly utf-16)
        still works with the surrogateescape error handler in action ...
        """
        s1 = 'quéstionable'
        b1 = s1.encode('utf-8')
        b2 = s1.encode('utf-16')
        b3 = s1.encode('latin-1')
        self.assertEqual(s1, b1.decode('utf-8', 'surrogateescape'))
        self.assertEqual(s1, b2.decode('utf-16', 'surrogateescape'))
        self.assertEqual(s1, b3.decode('latin-1', 'surrogateescape'))

        s2 = '文'
        b4 = s2.encode('utf-8')
        b5 = s2.encode('utf-16')
        b6 = s2.encode('shift-jis')
        self.assertEqual(s2, b4.decode('utf-8', 'surrogateescape'))
        self.assertEqual(s2, b5.decode('utf-16', 'surrogateescape'))
        self.assertEqual(s2, b6.decode('shift-jis', 'surrogateescape'))


if __name__ == '__main__':
    unittest.main()