1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
|
# -*- coding: utf-8 -*-
"""Misc unicode tests
Made for Jython.
"""
import re
import sys
import unittest
from StringIO import StringIO
from test import test_support
class UnicodeTestCase(unittest.TestCase):
def test_simplejson_plane_bug(self):
# a bug exposed by simplejson: unicode __add__ was always
# forcing the basic plane
chunker = re.compile(r'(.*?)(["\\\x00-\x1f])', re.VERBOSE | re.MULTILINE | re.DOTALL)
orig = u'z\U0001d120x'
quoted1 = u'"z\U0001d120x"'
quoted2 = '"' + orig + '"'
# chunker re gives different results depending on the plane
self.assertEqual(chunker.match(quoted1, 1).groups(), (orig, u'"'))
self.assertEqual(chunker.match(quoted2, 1).groups(), (orig, u'"'))
def test_parse_unicode(self):
foo = u'ą\n'
self.assertEqual(len(foo), 2, repr(foo))
self.assertEqual(repr(foo), "u'\\u0105\\n'")
self.assertEqual(ord(foo[0]), 261)
self.assertEqual(ord(foo[1]), 10)
bar = foo.encode('utf-8')
self.assertEqual(len(bar), 3)
self.assertEqual(repr(bar), "'\\xc4\\x85\\n'")
self.assertEqual(ord(bar[0]), 196)
self.assertEqual(ord(bar[1]), 133)
self.assertEqual(ord(bar[2]), 10)
def test_parse_raw_unicode(self):
foo = ur'ą\n'
self.assertEqual(len(foo), 3, repr(foo))
self.assertEqual(repr(foo), "u'\\u0105\\\\n'")
self.assertEqual(ord(foo[0]), 261)
self.assertEqual(ord(foo[1]), 92)
self.assertEqual(ord(foo[2]), 110)
bar = foo.encode('utf-8')
self.assertEqual(len(bar), 4)
self.assertEqual(repr(bar), "'\\xc4\\x85\\\\n'")
self.assertEqual(ord(bar[0]), 196)
self.assertEqual(ord(bar[1]), 133)
self.assertEqual(ord(bar[2]), 92)
self.assertEqual(ord(bar[3]), 110)
for baz in ur'Hello\u0020World !', ur'Hello\U00000020World !':
self.assertEqual(len(baz), 13, repr(baz))
self.assertEqual(repr(baz), "u'Hello World !'")
self.assertEqual(ord(baz[5]), 32)
quux = ur'\U00100000'
self.assertEqual(repr(quux), "u'\\U00100000'")
if sys.maxunicode == 0xffff:
self.assertEqual(len(quux), 2)
self.assertEqual(ord(quux[0]), 56256)
self.assertEqual(ord(quux[1]), 56320)
else:
self.assertEqual(len(quux), 1)
self.assertEqual(ord(quux), 1048576)
def test_raw_unicode_escape(self):
foo = u'\U00100000'
self.assertEqual(foo.encode('raw_unicode_escape'), '\\U00100000')
self.assertEqual(foo.encode('raw_unicode_escape').decode('raw_unicode_escape'),
foo)
for bar in '\\u', '\\u000', '\\U00000':
self.assertRaises(UnicodeDecodeError, bar.decode, 'raw_unicode_escape')
def test_encode_decimal(self):
self.assertEqual(int(u'\u0039\u0032'), 92)
self.assertEqual(int(u'\u0660'), 0)
self.assertEqual(int(u' \u001F\u0966\u096F\u0039'), 99)
self.assertEqual(long(u'\u0663'), 3)
self.assertEqual(float(u'\u0663.\u0661'), 3.1)
self.assertEqual(complex(u'\u0663.\u0661'), 3.1+0j)
def test_unstateful_end_of_data(self):
# http://bugs.jython.org/issue1368
for encoding in 'utf-8', 'utf-16', 'utf-16-be', 'utf-16-le':
self.assertRaises(UnicodeDecodeError, '\xe4'.decode, encoding)
def test_formatchar(self):
self.assertEqual('%c' % 255, '\xff')
self.assertRaises(OverflowError, '%c'.__mod__, 256)
result = u'%c' % 256
self.assert_(isinstance(result, unicode))
self.assertEqual(result, u'\u0100')
if sys.maxunicode == 0xffff:
self.assertEqual(u'%c' % sys.maxunicode, u'\uffff')
else:
self.assertEqual(u'%c' % sys.maxunicode, u'\U0010ffff')
self.assertRaises(OverflowError, '%c'.__mod__, sys.maxunicode + 1)
def test_repr(self):
self.assert_(isinstance('%r' % u'foo', str))
def test_concat(self):
self.assertRaises(UnicodeDecodeError, lambda : u'' + '毛泽东')
self.assertRaises(UnicodeDecodeError, lambda : '毛泽东' + u'')
def test_join(self):
self.assertRaises(UnicodeDecodeError, u''.join, ['foo', '毛泽东'])
self.assertRaises(UnicodeDecodeError, '毛泽东'.join, [u'foo', u'bar'])
def test_file_encoding(self):
'''Ensure file writing doesn't attempt to encode things by default and reading doesn't
decode things by default. This was jython's behavior prior to 2.2.1'''
EURO_SIGN = u"\u20ac"
try:
EURO_SIGN.encode()
except UnicodeEncodeError:
# This default encoding can't handle the encoding the Euro sign. Skip the test
return
f = open(test_support.TESTFN, "w")
self.assertRaises(UnicodeEncodeError, f, write, EURO_SIGN,
"Shouldn't be able to write out a Euro sign without first encoding")
f.close()
f = open(test_support.TESTFN, "w")
f.write(EURO_SIGN.encode('utf-8'))
f.close()
f = open(test_support.TESTFN, "r")
encoded_euro = f.read()
f.close()
os.remove(test_support.TESTFN)
self.assertEquals('\xe2\x82\xac', encoded_euro)
self.assertEquals(EURO_SIGN, encoded_euro.decode('utf-8'))
def test_translate(self):
# http://bugs.jython.org/issue1483
self.assertEqual(
u'\u0443\u043a\u0430\u0437\u0430\u0442\u044c'.translate({}),
u'\u0443\u043a\u0430\u0437\u0430\u0442\u044c')
self.assertEqual(u'\u0443oo'.translate({0x443: 102}), u'foo')
self.assertEqual(
unichr(sys.maxunicode).translate({sys.maxunicode: 102}),
u'f')
class UnicodeFormatTestCase(unittest.TestCase):
def test_unicode_mapping(self):
assertTrue = self.assertTrue
class EnsureUnicode(dict):
def __missing__(self, key):
assertTrue(isinstance(key, unicode))
return key
u'%(foo)s' % EnsureUnicode()
def test_non_ascii_unicode_mod_str(self):
# Regression test for a problem on the formatting logic: when no unicode
# args were found, Jython stored the resulting buffer on a PyString,
# decoding it later to make a PyUnicode. That crashed when the left side
# of % was a unicode containing non-ascii chars
self.assertEquals(u"\u00e7%s" % "foo", u"\u00e7foo")
class UnicodeStdIOTestCase(unittest.TestCase):
def setUp(self):
self.stdout = sys.stdout
def tearDown(self):
sys.stdout = self.stdout
def test_intercepted_stdout(self):
msg = u'Circle is 360\u00B0'
sys.stdout = StringIO()
print msg,
self.assertEqual(sys.stdout.getvalue(), msg)
def test_main():
test_support.run_unittest(UnicodeTestCase,
UnicodeFormatTestCase,
UnicodeStdIOTestCase)
if __name__ == "__main__":
test_main()
|