File: testUnicodeNormalizer.py

package info (click to toggle)
zope-cmfplone 2.5.1-4etch3
  • links: PTS
  • area: main
  • in suites: etch
  • size: 7,752 kB
  • ctags: 5,237
  • sloc: python: 28,264; xml: 3,723; php: 129; makefile: 99; sh: 2
file content (69 lines) | stat: -rw-r--r-- 2,515 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#
# Tests the normalizeUnicode method
#

import os, sys
if __name__ == '__main__':
    execfile(os.path.join(sys.path[0], 'framework.py'))

from Products.CMFPlone.tests import PloneTestCase

from Products.CMFPlone.UnicodeNormalizer import normalizeUnicode


class TestNormalizer(PloneTestCase.PloneTestCase):

    def testNormalize(self):
        # European accented chars will be transliterated to rough ASCII equivalents
        input = u"Eksempel \xe6\xf8\xe5 norsk \xc6\xd8\xc5"
        self.assertEqual(normalizeUnicode(input),
                         'Eksempel eoa norsk EOA')

    def testNormalizeSingleChars(self):
        self.assertEqual(normalizeUnicode(u"\xe6"), 'e')
        self.assertEqual(normalizeUnicode(u"a"), 'a')
        self.assertEqual(normalizeUnicode(u"\u9ad8"), '9ad8')

    def testNormalizeGerman(self):
        # German normalization mapping
        input = u"\xc4ffin"
        self.assertEqual(normalizeUnicode(input), 'Affin')

    def testNormalizeWithNumbers(self):
        # Mixed numbers with text
        input = u"Eksempel-1-2-3-\xe6\xf8\xe5 norsk \xc6\xd8\xc5"
        self.assertEqual(normalizeUnicode(input),
                         'Eksempel-1-2-3-eoa norsk EOA')

    def testNormalizeGreek(self):
        # Greek letters (not supported by UnicodeData)
        input = u'\u039d\u03af\u03ba\u03bf\u03c2 \u03a4\u03b6\u03ac\u03bd\u03bf\u03c2'
        self.assertEqual(normalizeUnicode(input), 'Nikos Tzanos')

    def testNormalizeRussian(self):
        # Russian letters (not supported by UnicodeData)
        input = u'\u041f\u043e\u043b\u0438\u0442\u0438\u043a\u0430'
        self.assertEqual(normalizeUnicode(input), 'Politika')
        input = u'\u042d\u043a\u043e\u043d\u043e\u043c\u0438\u043a\u0430'
        self.assertEqual(normalizeUnicode(input), 'Ekonomika')
        input = u'\u041f\u041e\u0421\u041b\u0415\u0414\u041d\u0418\u0415 \u041d\u041e\u0412\u041e\u0421\u0422\u0418'
        self.assertEqual(normalizeUnicode(input), 'POSLEDNIE NOVOSTI')

    def testNormalizeTurkish(self):
        # Turkish normalization mapping
        input = u"\xdc\u011f\xfcr"
        self.assertEqual(normalizeUnicode(input), 'Ugur')

    def testNormalizeNonUnicode(self):
        # Non-unicode input raises a TypeError
        self.assertRaises(TypeError, normalizeUnicode, 'foo')


def test_suite():
    from unittest import TestSuite, makeSuite
    suite = TestSuite()
    suite.addTest(makeSuite(TestNormalizer))
    return suite

if __name__ == '__main__':
    framework()