File: gen_unicode_test.tcl

package info (click to toggle)
tcllib 1.20%2Bdfsg-1
  • links: PTS
  • area: main
  • in suites: bullseye
  • size: 68,064 kB
  • sloc: tcl: 216,842; ansic: 14,250; sh: 2,846; xml: 1,766; yacc: 1,145; pascal: 881; makefile: 107; perl: 84; f90: 84; python: 33; ruby: 13; php: 11
file content (247 lines) | stat: -rw-r--r-- 7,421 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
#!/usr/bin/tclsh

# gen_unicode_test.tcl --
#
#	This program parses the RFC 3454 file and generates the
#	corresponding unicode.test file with unicode package tests.
#	The input to this program should be NormalizationTest.txt.
#	It can be downloaded from:
#	ftp://ftp.unicode.org/Public/UNIDATA/NormalizationTest.txt
#	Short test suite is generated by default. If you want to generate
#	all tests (more than 300000 test cases) add suffix 'full' as the
#	third argument.
#
# Usage: gen_unicode_test.tcl infile outdir ?full?
# 
# RCS: @(#) $Id: gen_unicode_test.tcl,v 1.1 2008/01/29 02:18:10 patthoyts Exp $

package require struct::list

set short_test_list [list \
    "LATIN CAPITAL LETTER D, COMBINING DOT ABOVE, COMBINING DOT BELOW" \
    "NO-BREAK SPACE" \
    "VULGAR FRACTION ONE HALF" \
    "ORIYA LETTER RRA" \
    "KANNADA VOWEL SIGN EE" \
    "TIBETAN LETTER GHA" \
    "MODIFIER LETTER CAPITAL A" \
    "GREEK SMALL LETTER EPSILON WITH PSILI AND OXIA" \
    "KANGXI RADICAL SPROUT" \
    "HIRAGANA LETTER DE" \
    "KATAKANA LETTER PA" \
    "HANGUL LETTER SIOS-PIEUP" \
    "HANGUL SYLLABLE GYANG" \
    "CJK COMPATIBILITY IDEOGRAPH-F98E" \
    "ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM" \
    "ARABIC LIGATURE AIN WITH JEEM ISOLATED FORM" \
    "FULLWIDTH DIGIT THREE" \
    "LATIN SMALL LETTER A, COMBINING CYRILLIC TITLO, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B" \
    "LATIN SMALL LETTER A, DEVANAGARI SIGN NUKTA, COMBINING TILDE OVERLAY, COMBINING RING OVERLAY, LATIN SMALL LETTER B" \
    "HANGUL SYLLABLE BYO, COMBINING TILDE OVERLAY, HANGUL JONGSEONG TIKEUT"]

set fd [open [lindex $argv 0]]

set all_tests {}
set n 0
while {[gets $fd line] >= 0} {
    set line [string trim $line]
    if {![regexp \
	      {^([ [:xdigit:]]+);([ [:xdigit:]]+);([ [:xdigit:]]+);([ [:xdigit:]]+);([ [:xdigit:]]+);.*\) (.*)} \
	      $line -> c(1) c(2) c(3) c(4) c(5) title]} continue

    set q 1
    foreach i {1 2 3 4 5} {
	set s($i) {}
	set us($i) ""
	foreach xnum $c($i) {
	    set uc [scan $xnum %x]
	    if {$uc > 0xffff} {
		set q 0
	    }
	    lappend s($i) $uc
	    append us($i) \\u$xnum
	}
    }
    if {!$q} {
	# Test case contains character which is greater than 0xFFFF and can't
	# be represented in Tcl
	continue
    }
    set test($n) [list $s(1) $s(2) $s(3) $s(4) $s(5) $title]
    set test1($n) [list $us(1) $us(2) $us(3) $us(4) $us(5) $title]
    if {[lsearch $short_test_list $title] >= 0} {
	lappend all_tests $n
    }
    incr n
}

close $fd

if {[string equal [lindex $argv 2] full]} {
    set all_tests [struct::list iota $n]
}

set f [open [file join [lindex $argv 1] unicode.test] w]
fconfigure $f -translation lf
puts $f \
"# unicode.test
#
# Tests for the unicode package. This file is automatically generated by
# the gen_unicode_test.tcl script. Do not modify this file by hands.
#
# RCS: @(#) \$Id\$

# -------------------------------------------------------------------------

source \[file join \\
	\[file dirname \[file dirname \[file join \[pwd\] \[info script\]\]\]\] \\
	devtools testutilities.tcl\]

testsNeedTcl     8.3
testsNeedTcltest 1.0

testing {
    useLocalFile unicode_data.tcl
    useLocalFile unicode.tcl
}

# -------------------------------------------------------------------------
"

set j 0
foreach i $all_tests {
    puts $f \
"
test unicode-1.[incr j] {normalizeS D: [lindex $test1($i) 5]} {
    unicode::normalizeS D \"[lindex $test1($i) 0]\"
} \"[lindex $test1($i) 2]\"

test unicode-1.[incr j] {normalize D: [lindex $test($i) 5]} {
    unicode::normalize D [list [lindex $test($i) 1]]
} {[lindex $test($i) 2]}

test unicode-1.[incr j] {normalize D: [lindex $test($i) 5]} {
    unicode::normalize D [list [lindex $test($i) 2]]
} {[lindex $test($i) 2]}

test unicode-1.[incr j] {normalize D: [lindex $test($i) 5]} {
    unicode::normalize D [list [lindex $test($i) 3]]
} {[lindex $test($i) 4]}

test unicode-1.[incr j] {normalize D: [lindex $test($i) 5]} {
    unicode::normalize D [list [lindex $test($i) 4]]
} {[lindex $test($i) 4]}
"
}

set j 0
foreach i $all_tests {
    puts $f \
"
test unicode-2.[incr j] {normalize C: [lindex $test($i) 5]} {
    unicode::normalize C [list [lindex $test($i) 0]]
} {[lindex $test($i) 1]}

test unicode-2.[incr j] {normalizeS C: [lindex $test1($i) 5]} {
    unicode::normalizeS C \"[lindex $test1($i) 1]\"
} \"[lindex $test1($i) 1]\"

test unicode-2.[incr j] {normalize C: [lindex $test($i) 5]} {
    unicode::normalize C [list [lindex $test($i) 2]]
} {[lindex $test($i) 1]}

test unicode-2.[incr j] {normalize C: [lindex $test($i) 5]} {
    unicode::normalize C [list [lindex $test($i) 3]]
} {[lindex $test($i) 3]}

test unicode-2.[incr j] {normalize C: [lindex $test($i) 5]} {
    unicode::normalize C [list [lindex $test($i) 4]]
} {[lindex $test($i) 3]}
"
}

set j 0
foreach i $all_tests {
    puts $f \
"
test unicode-3.[incr j] {normalize KD: [lindex $test($i) 5]} {
    unicode::normalize KD [list [lindex $test($i) 0]]
} {[lindex $test($i) 4]}

test unicode-3.[incr j] {normalize KD: [lindex $test($i) 5]} {
    unicode::normalize KD [list [lindex $test($i) 1]]
} {[lindex $test($i) 4]}

test unicode-3.[incr j] {normalizeS KD: [lindex $test1($i) 5]} {
    unicode::normalizeS KD \"[lindex $test1($i) 2]\"
} \"[lindex $test1($i) 4]\"

test unicode-3.[incr j] {normalize KD: [lindex $test($i) 5]} {
    unicode::normalize KD [list [lindex $test($i) 3]]
} {[lindex $test($i) 4]}

test unicode-1.[incr j] {normalize KD: [lindex $test($i) 5]} {
    unicode::normalize KD [list [lindex $test($i) 4]]
} {[lindex $test($i) 4]}
"
}

set j 0
foreach i $all_tests {
    puts $f \
"
test unicode-4.[incr j] {normalize KC: [lindex $test($i) 5]} {
    unicode::normalize KC [list [lindex $test($i) 0]]
} {[lindex $test($i) 3]}

test unicode-4.[incr j] {normalize KC: [lindex $test($i) 5]} {
    unicode::normalize KC [list [lindex $test($i) 1]]
} {[lindex $test($i) 3]}

test unicode-4.[incr j] {normalize KC: [lindex $test($i) 5]} {
    unicode::normalize KC [list [lindex $test($i) 2]]
} {[lindex $test($i) 3]}

test unicode-4.[incr j] {normalizeS KC: [lindex $test1($i) 5]} {
    unicode::normalizeS KC \"[lindex $test1($i) 3]\"
} \"[lindex $test1($i) 3]\"

test unicode-4.[incr j] {normalize KC: [lindex $test($i) 5]} {
    unicode::normalize KC [list [lindex $test($i) 4]]
} {[lindex $test($i) 3]}
"
}

puts $f \
"
test unicode-5.1 {fromstring} {
    unicode::fromstring \"\\u0403\\u0405\\u0406\\u041f\\u0034\"
} {1027 1029 1030 1055 52}

test unicode-5.2 {fromstring} {
    unicode::fromstring \"\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\u0008\\u0009\\u000a\\u000b\\u000c\\u000d\"
} {1 2 3 4 5 6 7 8 9 10 11 12 13}

test unicode-6.1 {tostring} {
    unicode::tostring {16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1}
} \"\\u0010\\u000f\\u000e\\u000d\\u000c\\u000b\\u000a\\u0009\\u0008\\u0007\\u0006\\u0005\\u0004\\u0003\\u0002\\u0001\"

test unicode-6.2 {tostring} {
    unicode::tostring {12345 12346 12347 12348 12349 12350 12351}
} \"\\u3039\\u303a\\u303b\\u303c\\u303d\\u303e\\u303f\"

test unicode-7.1 {normalize bad form} {
    catch {unicode::normalize S \"\"} result
    set result
} \"::unicode::normalize: Only D, C, KD and KC forms are allowed\"

test unicode-8.1 {normalizeS bad form} {
    catch {unicode::normalizeS S \"\"} result
    set result
} \"::unicode::normalizeS: Only D, C, KD and KC forms are allowed\"

::tcltest::cleanupTests
"

close $f