File: test_encoding.py

package info (click to toggle)
wader 0.5.12-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 2,672 kB
  • ctags: 5,053
  • sloc: python: 17,191; makefile: 142; sh: 131
file content (161 lines) | stat: -rw-r--r-- 7,859 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# -*- coding: utf-8 -*-
# Copyright (C) 2006-2008  Vodafone España, S.A.
# Copyright (C) 2008-2009  Warp Networks, S.L.
# Author:  Pablo Martí
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""Unittests for the encoding module"""

from twisted.trial import unittest

from wader.common.encoding import (CONTROL_0, CONTROL_1, LATIN_EX_A,
                                   LATIN_EX_B, check_if_ucs2,
                                   pack_ucs2_bytes, unpack_ucs2_bytes,
                                   unpack_ucs2_bytes_in_ts31101_80,
                                   unpack_ucs2_bytes_in_ts31101_81,
                                   unpack_ucs2_bytes_in_ts31101_82)

CTL_0 = '007F'
CTL_1 = '00FF'
LTN_A = '017F'
LTN_B = '024F'


class TestEncoding(unittest.TestCase):
    """Tests for encoding"""

    def test_check_if_ucs2(self):
        self.assertEqual(check_if_ucs2(CTL_0), True)
        self.assertEqual(check_if_ucs2(CTL_1), True)
        self.assertEqual(check_if_ucs2(LTN_A), True)
        self.assertEqual(check_if_ucs2(LTN_B), True)
        self.assertEqual(check_if_ucs2('6C34'), True)
        self.assertEqual(
            check_if_ucs2('0056006F006400610066006F006E0065'), True)
        self.assertEqual(check_if_ucs2('003'), False)

        # XXX: This should be invalid but our code fails at the moment
        # XXX: Disable as people expect tests to run cleanly unless something
        #      just broke, and there's no way to express 'known failure'
        #self.assertEqual(check_if_ucs2('D834DD1E'), False)

    def test_check_if_ucs2_limit_control_0(self):
        self.assertEqual(check_if_ucs2(CTL_0, limit=CONTROL_0), True)
        self.assertEqual(check_if_ucs2(CTL_1, limit=CONTROL_0), False)
        self.assertEqual(check_if_ucs2(LTN_A, limit=CONTROL_0), False)
        self.assertEqual(check_if_ucs2(LTN_B, limit=CONTROL_0), False)
        self.assertEqual(check_if_ucs2('6C34', limit=CONTROL_0), False)
        self.assertEqual(
            check_if_ucs2(CTL_0 + CTL_0 + CTL_0, limit=CONTROL_0), True)
        self.assertEqual(
            check_if_ucs2('6C34' + CTL_0 + CTL_0, limit=CONTROL_0), False)
        self.assertEqual(
            check_if_ucs2(CTL_0 + '6C34' + CTL_0, limit=CONTROL_0), False)
        self.assertEqual(
            check_if_ucs2(CTL_0 + CTL_0 + '6C34', limit=CONTROL_0), False)

    def test_check_if_ucs2_limit_control_1(self):
        self.assertEqual(check_if_ucs2(CTL_0, limit=CONTROL_1), True)
        self.assertEqual(check_if_ucs2(CTL_1, limit=CONTROL_1), True)
        self.assertEqual(check_if_ucs2(LTN_A, limit=CONTROL_1), False)
        self.assertEqual(check_if_ucs2(LTN_B, limit=CONTROL_1), False)
        self.assertEqual(check_if_ucs2('6C34', limit=CONTROL_1), False)
        self.assertEqual(
            check_if_ucs2(CTL_1 + CTL_1 + CTL_1, limit=CONTROL_1), True)
        self.assertEqual(
            check_if_ucs2('6C34' + CTL_1 + CTL_1, limit=CONTROL_1), False)
        self.assertEqual(
            check_if_ucs2(CTL_1 + '6C34' + CTL_1, limit=CONTROL_1), False)
        self.assertEqual(
            check_if_ucs2(CTL_1 + CTL_1 + '6C34', limit=CONTROL_1), False)

    def test_check_if_ucs2_limit_extended_latin_a(self):
        self.assertEqual(check_if_ucs2(CTL_0, limit=LATIN_EX_A), True)
        self.assertEqual(check_if_ucs2(CTL_1, limit=LATIN_EX_A), True)
        self.assertEqual(check_if_ucs2(LTN_A, limit=LATIN_EX_A), True)
        self.assertEqual(check_if_ucs2(LTN_B, limit=LATIN_EX_A), False)
        self.assertEqual(check_if_ucs2('6C34', limit=LATIN_EX_A), False)
        self.assertEqual(
            check_if_ucs2(LTN_A + LTN_A + LTN_A, limit=LATIN_EX_A), True)
        self.assertEqual(
            check_if_ucs2('6C34' + LTN_A + LTN_A, limit=LATIN_EX_A), False)
        self.assertEqual(
            check_if_ucs2(LTN_A + '6C34' + LTN_A, limit=LATIN_EX_A), False)
        self.assertEqual(
            check_if_ucs2(LTN_A + LTN_A + '6C34', limit=LATIN_EX_A), False)

    def test_check_if_ucs2_limit_extended_latin_b(self):
        self.assertEqual(check_if_ucs2(CTL_0, limit=LATIN_EX_B), True)
        self.assertEqual(check_if_ucs2(CTL_1, limit=LATIN_EX_B), True)
        self.assertEqual(check_if_ucs2(LTN_A, limit=LATIN_EX_B), True)
        self.assertEqual(check_if_ucs2(LTN_B, limit=LATIN_EX_B), True)
        self.assertEqual(check_if_ucs2('6C34', limit=LATIN_EX_B), False)
        self.assertEqual(
            check_if_ucs2(LTN_B + LTN_B + LTN_B, limit=LATIN_EX_B), True)
        self.assertEqual(
            check_if_ucs2('6C34' + LTN_B + LTN_B, limit=LATIN_EX_B), False)
        self.assertEqual(
            check_if_ucs2(LTN_B + '6C34' + LTN_B, limit=LATIN_EX_B), False)
        self.assertEqual(
            check_if_ucs2(LTN_B + LTN_B + '6C34', limit=LATIN_EX_B), False)

    def test_pack_ucs2_bytes(self):
        # 07911356131313F311000A9260214365870008AA080068006F006C0061
        self.assertEqual(pack_ucs2_bytes('hola'), '0068006F006C0061')
        # 07911356131313F311000A9260214365870008AA0A0068006F006C00610073
        self.assertEqual(pack_ucs2_bytes('holas'), '0068006F006C00610073')

        self.assertEqual(pack_ucs2_bytes(u"中华人民共和国"),
                         '4E2D534E4EBA6C115171548C56FD')

    def test_unpack_ucs2_bytes(self):
        self.assertEqual(unpack_ucs2_bytes('0068006F006C0061'), 'hola')
        resp = 'holas'
        self.assertEqual(unpack_ucs2_bytes('0068006F006C00610073'), resp)

    def test_unpack_ucs2_bytes_in_ts31101_80(self):
        # From Huawei example
        self.assertEqual(
            unpack_ucs2_bytes_in_ts31101_80('534E4E3A'), u'华为')

    def test_unpack_ucs2_bytes_in_ts31101_81(self):
        # From our original Huawei contacts code
        self.assertEqual(
            unpack_ucs2_bytes_in_ts31101_81('0602A46563746F72FF'), u'Ĥector')

        # From Android code
        self.assertEqual(
            unpack_ucs2_bytes_in_ts31101_81('0A01566FEC6365204DE0696CFFFFFF'),
                                            u'Vo\u00ECce M\u00E0il')
        # From TS102221
        # Byte 4 indicates GSM Default Alphabet character '53', i.e. 'S'.
        # Byte 5 indicates a UCS2 character offset to the base pointer of '15',
        #           expressed in binary as follows 001 0101, which, when added
        #           to the base pointer value results in a sixteen bit value of
        #           0000 1001 1001 0101, i.e. '0995', which is the Bengali
        #           letter KA.
        # Byte 6 / 7 were not defined in TS102221 example, so just repeated 5
        # Byte 8 contains the value 'FF', but as the string length is 5, this
        #           is a valid character in the string, where the bit pattern
        #           111 1111 is added to the base pointer, yielding a sixteen
        #           bit value of 0000 1001 1111 1111 for the UCS2 character
        #           (i.e. '09FF').
        self.assertEqual(
            unpack_ucs2_bytes_in_ts31101_81('051353959595FFFF'), u'Sককক\u09FF')

    def test_unpack_ucs2_bytes_in_ts31101_82(self):
        # From TS102221
        self.assertEqual(
            unpack_ucs2_bytes_in_ts31101_82('0505302D82D32D31'), u'-Բփ-1')