File: Tests.hs

package info (click to toggle)
haskell-encoding 0.10.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,392 kB
  • sloc: haskell: 4,372; ansic: 11; makefile: 4
file content (285 lines) | stat: -rw-r--r-- 11,144 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
module Test.Tests where

import Test.Tester
import Data.Encoding
import Data.Encoding.UTF8
import Data.Encoding.UTF16
import Data.Encoding.UTF32
import Data.Encoding.ISO88592
import Data.Encoding.ISO88593
import Data.Encoding.ISO88594
import Data.Encoding.ISO88595
import Data.Encoding.ISO88596
import Data.Encoding.ISO88597
import Data.Encoding.ISO88598
import Data.Encoding.ISO88599
import Data.Encoding.ISO885910
import Data.Encoding.ISO885911
import Data.Encoding.ISO885913
import Data.Encoding.ISO885914
import Data.Encoding.JISX0208
import Data.Encoding.ISO2022JP
import Data.Encoding.GB18030
import Data.Encoding.BootString
--import Data.Encoding.BIG5
import Data.Encoding.CP437
import Test.HUnit
import Test.QuickCheck
import Data.Char (ord)

identityTests :: IO ()
identityTests = do
  putStrLn "for UTF8..."
  quickCheckEncoding UTF8Strict
  putStrLn "for UTF16..."
  quickCheckEncoding UTF16BE
  quickCheckEncoding UTF16LE
  putStrLn "for UTF32..."
  quickCheck $ encodingIdentity UTF32
  putStrLn "for ISO88592..."
  quickCheckEncoding ISO88592
  putStrLn "for ISO88593..."
  quickCheckEncoding ISO88593
  putStrLn "for ISO88594..."
  quickCheckEncoding ISO88594
  putStrLn "for ISO88595..."
  quickCheckEncoding ISO88595
  putStrLn "for ISO88596..."
  quickCheckEncoding ISO88596
  putStrLn "for ISO88597..."
  quickCheckEncoding ISO88597
  putStrLn "for ISO88598..."
  quickCheckEncoding ISO88598
  putStrLn "for ISO88599..."
  quickCheckEncoding ISO88599
  putStrLn "for ISO885910..."
  quickCheckEncoding ISO885910
  putStrLn "for ISO885911..."
  quickCheckEncoding ISO885911
  putStrLn "for ISO885913..."
  quickCheckEncoding ISO885913
  putStrLn "for ISO885914..."
  quickCheckEncoding ISO885914
  putStrLn "for Punycode..."
  quickCheck $ encodingIdentity punycode
  putStrLn "for GB18030"
  quickCheck $ encodingIdentity GB18030
  putStrLn "for CP437"
  quickCheck $ encodingIdentity CP437

utf8Tests :: Test
utf8Tests = TestList $ map test $ concat
            [[EncodingTest enc "\x0041\x2262\x0391\x002E"
                           [0x41,0xE2,0x89,0xA2,0xCE,0x91,0x2E]
         ,EncodingTest enc "\xD55C\xAD6D\xC5B4"
                       [0xED,0x95,0x9C,0xEA,0xB5,0xAD,0xEC,0x96,0xB4]
         ,EncodingTest enc "\x65E5\x672C\x8A9E"
                       [0xE6,0x97,0xA5,0xE6,0x9C,0xAC,0xE8,0xAA,0x9E]
         ,EncodingTest enc "\x233B4"
                       [0xF0,0xA3,0x8E,0xB4]
         ,EncodingTest enc ""
                       []
         -- First possible sequence of a certain length
         ,EncodingTest enc "\x0000"
                       [0x00]
         ,EncodingTest enc "\x0080"
                       [0xC2,0x80]
         ,EncodingTest enc "\x0800"
                       [0xE0,0xA0,0x80]
         ,EncodingTest enc "\x10000"
                       [0xF0,0x90,0x80,0x80]
         -- Last possible sequence of a certain length
         ,EncodingTest enc "\x007F"
                       [0x7F]
         ,EncodingTest enc "\x07FF"
                       [0xDF,0xBF]
         ,EncodingTest enc "\xFFFF"
                       [0xEF,0xBF,0xBF]
         -- Other boundaries
         ,EncodingTest enc "\xD7FF"
                       [0xED,0x9F,0xBF]
         ,EncodingTest enc "\xE000"
                       [0xEE,0x80,0x80]
         ,EncodingTest enc "\xFFFD"
                       [0xEF,0xBF,0xBD]
         -- Illegal starting characters
         ,DecodingError enc
                            [0x65,0x55,0x85]
                        (IllegalCharacter 0x85)
         -- Unexpected end
         ,DecodingError enc
                            [0x41,0xE2,0x89,0xA2,0xCE]
                        UnexpectedEnd
         ,DecodingError enc
                        [0x41,0xE2,0x89]
                        UnexpectedEnd
         ,DecodingError enc
                        [0x41,0xE2]
                        UnexpectedEnd
             ]
             | enc <- [UTF8,UTF8Strict]
            ]++
        [DecodingError UTF8 [0xFE] (IllegalCharacter 0xFE)
        ,DecodingError UTF8 [0xFF] (IllegalCharacter 0xFF)
        -- Overlong representations of '/'
        ,DecodingError UTF8Strict [0xC0,0xAF]
                       (IllegalRepresentation [0xC0,0xAF])
        ,DecodingError UTF8Strict [0xE0,0x80,0xAF]
                       (IllegalRepresentation [0xE0,0x80,0xAF])
        ,DecodingError UTF8Strict [0xF0,0x80,0x80,0xAF]
                       (IllegalRepresentation [0xF0,0x80,0x80,0xAF])
        -- Maximum overlong sequences
        ,DecodingError UTF8Strict [0xC1,0xBF]
                       (IllegalRepresentation [0xC1,0xBF])
        ,DecodingError UTF8Strict [0xE0,0x9F,0xBF]
                       (IllegalRepresentation [0xE0,0x9F,0xBF])
        ,DecodingError UTF8Strict [0xF0,0x8F,0xBF,0xBF]
                       (IllegalRepresentation [0xF0,0x8F,0xBF,0xBF])
        -- Overlong represenations of '\NUL'
        ,DecodingError UTF8Strict [0xC0,0x80]
                       (IllegalRepresentation [0xC0,0x80])
        ,DecodingError UTF8Strict [0xE0,0x80,0x80]
                       (IllegalRepresentation [0xE0,0x80,0x80])
        ,DecodingError UTF8Strict [0xF0,0x80,0x80,0x80]
                       (IllegalRepresentation [0xF0,0x80,0x80,0x80])
        -- Invalid extends
        -- 2 of 2
        ,DecodingError UTF8Strict [0xCC,0x1C,0xE0]
                       (IllegalCharacter 0x1C)
        -- 2 of 3
        ,DecodingError UTF8Strict [0xE3,0x6C,0xB3]
                       (IllegalCharacter 0x6C)
        -- 3 of 3
        ,DecodingError UTF8Strict [0xE3,0xB4,0x6D]
                       (IllegalCharacter 0x6D)
        -- 2 of 4
        ,DecodingError UTF8Strict [0xF2,0x6C,0xB3,0xB3]
                       (IllegalCharacter 0x6C)
        -- 3 of 4
        ,DecodingError UTF8Strict [0xF2,0xB3,0x6C,0xB3]
                       (IllegalCharacter 0x6C)
        -- 4 of 4
        ,DecodingError UTF8Strict [0xF2,0xB3,0xB3,0x6C]
                       (IllegalCharacter 0x6C)
        ]

utf16Tests :: Test
utf16Tests = TestList $ map test $
    [EncodingTest UTF16BE "z"
        [0x00,0x7A]
    ,EncodingTest UTF16BE "\x6C34"
        [0x6C,0x34]
    ,EncodingTest UTF16BE "\x1D11E"
        [0xD8,0x34,0xDD,0x1E]
    ,EncodingTest UTF16 "\x6C34z\x1D11E"
        [0xFE,0xFF,0x6C,0x34,0x00,0x7A,0xD8,0x34,0xDD,0x1E]
    ,EncodingTest UTF16BE "˨"
        [0x02,0xE8]
    ,DecodingError UTF16LE [0x65,0xDC]
        (IllegalCharacter 0xDC)
    ,DecodingError UTF16BE [0xDC,0x33]
        (IllegalCharacter 0xDC)
    ,DecodingError UTF16BE [0xD9,0x78,0xDA,0x33]
        (IllegalCharacter 0xDA)
    ,DecodingError UTF16BE [0xD9,0x78,0xDA,0x66]
        (IllegalCharacter 0xDA)
    ]

punycodeTests :: Test
punycodeTests = TestList $ map test $
    [EncodingTest punycode "abcdef"
        (map (fromIntegral.ord) "abcdef-")
    ,EncodingTest punycode "abæcdöef"
        (map (fromIntegral.ord) "abcdef-qua4k")
    ,EncodingTest punycode "schön"
        (map (fromIntegral.ord) "schn-7qa")
    ,EncodingTest punycode "ยจฆฟคฏข"
        (map (fromIntegral.ord) "22cdfh1b8fsa")
    ,EncodingTest punycode "☺"
        (map (fromIntegral.ord) "74h")
    -- taken from http://tools.ietf.org/html/rfc3492#section-7
    -- Arabic (Egyptian)
    ,punyTest "ليهمابتكلموشعربي؟"
        "egbpdaj6bu4bxfgehfvwxn"
    -- Chinese (simplified)
    ,punyTest "他们为什么不说中文"
        "ihqwcrb4cv8a8dqg056pqjye"
    -- Chinese (traditional)
    ,punyTest "他們爲什麽不說中文"
        "ihqwctvzc91f659drss3x8bo0yb"
    -- Czech
    ,punyTest "Pročprostěnemluvíčesky"
        "Proprostnemluvesky-uyb24dma41a"
    -- Hebrew
    ,punyTest "למההםפשוטלאמדבריםעברית"
        "4dbcagdahymbxekheh6e0a7fei0b"
    -- Hindi (Devanagari)
    ,punyTest "\x92F\x939\x932\x94B\x917\x939\x93F\x928\x94D\x926\x940\x915\x94D\x92F\x94B\x902\x928\x939\x940\x902\x92C\x94B\x932\x938\x915\x924\x947\x939\x948\x902"
        "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"
    -- Japanese (kanji and hiragana)
    ,punyTest "なぜみんな日本語を話してくれないのか"
        "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"
    -- Korean (Hangul syllables)
    ,punyTest "세계의모든사람들이한국어를이해한다면얼마나좋을까"
        "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c"
    -- Russian (Cyrillic)
    ,punyTest "почемужеонинеговорятпорусски"
        "b1abfaaepdrnnbgefbadotcwatmq2g4l" -- I think the ietf made a mistake there
    -- Spanish
    ,punyTest "PorquénopuedensimplementehablarenEspañol"
        "PorqunopuedensimplementehablarenEspaol-fmd56a"
    -- Vietnamese
    ,punyTest "TạisaohọkhôngthểchỉnóitiếngViệt"
        "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"
    {-,punyTest "foo\xC2\xAD\xCD\x8F\xE1\xA0\x86\xE1\xA0\x8B\
        \bar\xE2\x80\x8B\xE2\x81\xA0\
        \baz\xEF\xB8\x80\xEF\xB8\x88\xEF\xB8\x8F\xEF\xBB\xBF"
        "foobarbaz"-}
    ]
    where punyTest str outp = EncodingTest punycode str (map (fromIntegral.ord) outp)

isoTests :: Test
isoTests = TestList $ map test $
           [EncodingTest ISO88592 "\x104\x2D8\x141\xA4\x13D\x15A\xA7\xA8\x160\x15E\x164\x179\xAD\x17D\x17B\xB0\x105\x2DB\x142\xB4\x13E\x15B\x2C7\xB8\x161\x15F"
            [0xA1..0xBA]
           -- Taken from GNU iconv library
           ,EncodingTest ISO2022JP "Japanese (\x65E5\x672C\x8A9E)\t\t\x3053\x3093\x306B\x3061\x306F\n\tJIS  -- \x5143\x6C17  \x958B\x767A\n"
            [0x4a,0x61,0x70,0x61,0x6e,0x65,0x73,0x65,0x20,0x28,0x1b,0x24,0x42,0x46,0x7c,0x4b
            ,0x5c,0x38,0x6c,0x1b,0x28,0x42,0x29,0x09,0x09,0x1b,0x24,0x42,0x24,0x33,0x24,0x73
            ,0x24,0x4b,0x24,0x41,0x24,0x4f,0x1b,0x28,0x42,0x0a,0x09,0x4a,0x49,0x53,0x20,0x20
            ,0x2d,0x2d,0x20,0x1b,0x24,0x42,0x38,0x35,0x35,0x24,0x1b,0x28,0x42,0x20,0x20,0x1b
            ,0x24,0x42,0x33,0x2b,0x48,0x2f,0x1b,0x28,0x42,0x0a]
           ]

jisTests :: Test
jisTests = TestList $ map test $
           [EncodingTest JISX0208 "\x4E9C"
            [0x30,0x21]
           ]

gb18030Tests :: Test
gb18030Tests = TestList $ map test $
               [EncodingTest GB18030 "\x0000\x003F\x0040\x007F\x0080\x00BF\x00C0\x00FF\x0100\x3FFF\x4000\x7FFF\x8000\xBFFF\xC000\xEFFF\xF000\xFFFF"
                (concat [[0x00]
                        ,[0x3F]
                        ,[0x40]
                        ,[0x7F]
                        ,[0x81,0x30,0x81,0x30]
                        ,[0x81,0x30,0x86,0x37]
                        ,[0x81,0x30,0x86,0x38]
                        ,[0x81,0x30,0x8B,0x37]
                        ,[0x81,0x30,0x8B,0x38]
                        ,[0x82,0x32,0xA6,0x36]
                        ,[0x82,0x32,0xA6,0x37]
                        ,[0xC2,0x52]
                        ,[0xD2,0xAB]
                        ,[0x83,0x31,0xD7,0x34]
                        ,[0x83,0x31,0xD7,0x35]
                        ,[0x83,0x38,0x96,0x36]
                        ,[0x83,0x38,0x96,0x37]
                        ,[0x84,0x31,0xA4,0x39]])
               ]

{-big5Tests :: Test
big5Tests = test (EncodingFileTest BIG5 "data/BIG5" "data/BIG5.UTF-8")-}