1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
|
#
# Tests the normalizeUnicode method
#
import os, sys
if __name__ == '__main__':
execfile(os.path.join(sys.path[0], 'framework.py'))
from Products.CMFPlone.tests import PloneTestCase
from Products.CMFPlone.UnicodeNormalizer import normalizeUnicode
class TestNormalizer(PloneTestCase.PloneTestCase):
def testNormalize(self):
# European accented chars will be transliterated to rough ASCII equivalents
input = u"Eksempel \xe6\xf8\xe5 norsk \xc6\xd8\xc5"
self.assertEqual(normalizeUnicode(input),
'Eksempel eoa norsk EOA')
def testNormalizeSingleChars(self):
self.assertEqual(normalizeUnicode(u"\xe6"), 'e')
self.assertEqual(normalizeUnicode(u"a"), 'a')
self.assertEqual(normalizeUnicode(u"\u9ad8"), '9ad8')
def testNormalizeGerman(self):
# German normalization mapping
input = u"\xc4ffin"
self.assertEqual(normalizeUnicode(input), 'Affin')
def testNormalizeWithNumbers(self):
# Mixed numbers with text
input = u"Eksempel-1-2-3-\xe6\xf8\xe5 norsk \xc6\xd8\xc5"
self.assertEqual(normalizeUnicode(input),
'Eksempel-1-2-3-eoa norsk EOA')
def testNormalizeGreek(self):
# Greek letters (not supported by UnicodeData)
input = u'\u039d\u03af\u03ba\u03bf\u03c2 \u03a4\u03b6\u03ac\u03bd\u03bf\u03c2'
self.assertEqual(normalizeUnicode(input), 'Nikos Tzanos')
def testNormalizeRussian(self):
# Russian letters (not supported by UnicodeData)
input = u'\u041f\u043e\u043b\u0438\u0442\u0438\u043a\u0430'
self.assertEqual(normalizeUnicode(input), 'Politika')
input = u'\u042d\u043a\u043e\u043d\u043e\u043c\u0438\u043a\u0430'
self.assertEqual(normalizeUnicode(input), 'Ekonomika')
input = u'\u041f\u041e\u0421\u041b\u0415\u0414\u041d\u0418\u0415 \u041d\u041e\u0412\u041e\u0421\u0422\u0418'
self.assertEqual(normalizeUnicode(input), 'POSLEDNIE NOVOSTI')
def testNormalizeTurkish(self):
# Turkish normalization mapping
input = u"\xdc\u011f\xfcr"
self.assertEqual(normalizeUnicode(input), 'Ugur')
def testNormalizeNonUnicode(self):
# Non-unicode input raises a TypeError
self.assertRaises(TypeError, normalizeUnicode, 'foo')
def test_suite():
from unittest import TestSuite, makeSuite
suite = TestSuite()
suite.addTest(makeSuite(TestNormalizer))
return suite
if __name__ == '__main__':
framework()
|