1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399
|
#! /usr/local/bin/perl -w
# vim: tabstop=4
# vim: syntax=perl
use strict;
use Test;
BEGIN {
plan tests => 7;
}
use Locale::Recode;
sub int2utf8;
my $local2ucs = {};
my $ucs2local = {};
while (<DATA>) {
my ($code, $ucs, undef) = map { oct $_ } split /\s+/, $_;
$local2ucs->{$code} = $ucs;
$ucs2local->{$ucs} = $code unless $ucs == 0xfffd;
}
my $cd_int = Locale::Recode->new (from => 'ISO_5428',
to => 'INTERNAL');
ok !$cd_int->getError;
my $cd_utf8 = Locale::Recode->new (from => 'ISO_5428',
to => 'UTF-8');
ok !$cd_utf8->getError;
my $cd_rev = Locale::Recode->new (from => 'INTERNAL',
to => 'ISO_5428');
ok !$cd_rev->getError;
# Convert into internal representation.
my $result_int = 1;
while (my ($code, $ucs) = each %$local2ucs) {
my $outbuf = chr $code;
my $result = $cd_int->recode ($outbuf);
unless ($result && $outbuf->[0] == $ucs) {
$result_int = 0;
last;
}
}
ok $result_int;
# Convert to UTF-8.
my $result_utf8 = 1;
while (my ($code, $ucs) = each %$local2ucs) {
my $outbuf = chr $code;
my $result = $cd_utf8->recode ($outbuf);
unless ($result && $outbuf eq int2utf8 $ucs) {
$result_utf8 = 0;
last;
}
}
ok $result_utf8;
# Convert from internal representation.
my $result_rev = 1;
while (my ($ucs, $code) = each %$ucs2local) {
my $outbuf = [ $ucs ];
my $result = $cd_rev->recode ($outbuf);
unless ($result && $code == ord $outbuf) {
$result_int = 0;
last;
}
}
ok $result_int;
# Check handling of unknown characters.
my $test_string1 = [ unpack 'c*', ' Supergirl ' ];
$test_string1->[0] = 0xad0be;
$test_string1->[-1] = 0xad0be;
my $test_string2 = [ unpack 'c*', 'Supergirl' ];
my $unknown = "\x3f"; # Unknown character!
$cd_rev = Locale::Recode->new (from => 'INTERNAL',
to => 'ISO_5428',
)
&& $cd_rev->recode ($test_string1)
&& $cd_rev->recode ($test_string2)
&& ($test_string2 = $unknown . $test_string2 . $unknown);
ok $test_string1 eq $test_string2;
sub int2utf8
{
my $ucs4 = shift;
if ($ucs4 <= 0x7f) {
return chr $ucs4;
} elsif ($ucs4 <= 0x7ff) {
return pack ("C2",
(0xc0 | (($ucs4 >> 6) & 0x1f)),
(0x80 | ($ucs4 & 0x3f)));
} elsif ($ucs4 <= 0xffff) {
return pack ("C3",
(0xe0 | (($ucs4 >> 12) & 0xf)),
(0x80 | (($ucs4 >> 6) & 0x3f)),
(0x80 | ($ucs4 & 0x3f)));
} elsif ($ucs4 <= 0x1fffff) {
return pack ("C4",
(0xf0 | (($ucs4 >> 18) & 0x7)),
(0x80 | (($ucs4 >> 12) & 0x3f)),
(0x80 | (($ucs4 >> 6) & 0x3f)),
(0x80 | ($ucs4 & 0x3f)));
} elsif ($ucs4 <= 0x3ffffff) {
return pack ("C5",
(0xf0 | (($ucs4 >> 24) & 0x3)),
(0x80 | (($ucs4 >> 18) & 0x3f)),
(0x80 | (($ucs4 >> 12) & 0x3f)),
(0x80 | (($ucs4 >> 6) & 0x3f)),
(0x80 | ($ucs4 & 0x3f)));
} else {
return pack ("C6",
(0xf0 | (($ucs4 >> 30) & 0x3)),
(0x80 | (($ucs4 >> 24) & 0x1)),
(0x80 | (($ucs4 >> 18) & 0x3f)),
(0x80 | (($ucs4 >> 12) & 0x3f)),
(0x80 | (($ucs4 >> 6) & 0x3f)),
(0x80 | ($ucs4 & 0x3f)));
}
}
#Local Variables:
#mode: perl
#perl-indent-level: 4
#perl-continued-statement-offset: 4
#perl-continued-brace-offset: 0
#perl-brace-offset: -4
#perl-brace-imaginary-offset: 0
#perl-label-offset: -4
#tab-width: 4
#End:
__DATA__
0x00 0x0000
0x01 0x0001
0x02 0x0002
0x03 0x0003
0x04 0x0004
0x05 0x0005
0x06 0x0006
0x07 0x0007
0x08 0x0008
0x09 0x0009
0x0a 0x000a
0x0b 0x000b
0x0c 0x000c
0x0d 0x000d
0x0e 0x000e
0x0f 0x000f
0x10 0x0010
0x11 0x0011
0x12 0x0012
0x13 0x0013
0x14 0x0014
0x15 0x0015
0x16 0x0016
0x17 0x0017
0x18 0x0018
0x19 0x0019
0x1a 0x001a
0x1b 0x001b
0x1c 0x001c
0x1d 0x001d
0x1e 0x001e
0x1f 0x001f
0x20 0x0020
0x21 0xe002
0x22 0xe003
0x23 0xe009
0x24 0xe005
0x25 0xe012
0x26 0xe013
0x27 0xe014
0x30 0xfffd
0x30 0xfffd
0x30 0xfffd
0x30 0xfffd
0x30 0xfffd
0x30 0xfffd
0x30 0xfffd
0x30 0xfffd
0x30 0x00ab
0x31 0x00bb
0x32 0x201d
0x33 0x201c
0x34 0x0374
0x35 0x0375
0x3b 0xfffd
0x3b 0xfffd
0x3b 0xfffd
0x3b 0xfffd
0x3b 0xfffd
0x3b 0x00b7
0x3f 0xfffd
0x3f 0xfffd
0x3f 0xfffd
0x3f 0x003b
0x41 0xfffd
0x41 0x0391
0x42 0x0392
0x44 0xfffd
0x44 0x0393
0x45 0x0394
0x46 0x0395
0x47 0x03da
0x48 0x03dc
0x49 0x0396
0x4a 0x0397
0x4b 0x0398
0x4c 0x0399
0x4d 0x039a
0x4e 0x039b
0x4f 0x039c
0x50 0x039d
0x51 0x039e
0x52 0x039f
0x53 0x03a0
0x54 0x03de
0x55 0x03a1
0x56 0x03a3
0x58 0xfffd
0x58 0x03a4
0x59 0x03a5
0x5a 0x03a6
0x5b 0x03a7
0x5c 0x03a8
0x5d 0x03a9
0x5e 0x03e0
0x61 0xfffd
0x61 0xfffd
0x61 0x03b1
0x62 0x03b2
0x63 0x03d0
0x64 0x03b3
0x65 0x03b4
0x66 0x03b5
0x67 0xe01a
0x68 0xe01b
0x69 0x03b6
0x6a 0x03b7
0x6b 0x03b8
0x6c 0x03b9
0x6d 0x03ba
0x6e 0x03bb
0x6f 0x03bc
0x70 0x03bd
0x71 0x03be
0x72 0x03bf
0x73 0x03c0
0x74 0xe01c
0x75 0x03c1
0x76 0x03c3
0x77 0x03c2
0x78 0x03c4
0x79 0x03c5
0x7a 0x03c6
0x7b 0x03c7
0x7c 0x03c8
0x7d 0x03c9
0x7e 0xe01d
0x7f 0x007f
0x80 0xfffd
0x81 0xfffd
0x82 0xfffd
0x83 0xfffd
0x84 0xfffd
0x85 0xfffd
0x86 0xfffd
0x87 0xfffd
0x88 0xfffd
0x89 0xfffd
0x8a 0xfffd
0x8b 0xfffd
0x8c 0xfffd
0x8d 0xfffd
0x8e 0xfffd
0x8f 0xfffd
0x90 0xfffd
0x91 0xfffd
0x92 0xfffd
0x93 0xfffd
0x94 0xfffd
0x95 0xfffd
0x96 0xfffd
0x97 0xfffd
0x98 0xfffd
0x99 0xfffd
0x9a 0xfffd
0x9b 0xfffd
0x9c 0xfffd
0x9d 0xfffd
0x9e 0xfffd
0x9f 0xfffd
0xa0 0xfffd
0xa1 0xfffd
0xa2 0xfffd
0xa3 0xfffd
0xa4 0xfffd
0xa5 0xfffd
0xa6 0xfffd
0xa7 0xfffd
0xa8 0xfffd
0xa9 0xfffd
0xaa 0xfffd
0xab 0xfffd
0xac 0xfffd
0xad 0xfffd
0xae 0xfffd
0xaf 0xfffd
0xb0 0xfffd
0xb1 0xfffd
0xb2 0xfffd
0xb3 0xfffd
0xb4 0xfffd
0xb5 0xfffd
0xb6 0xfffd
0xb7 0xfffd
0xb8 0xfffd
0xb9 0xfffd
0xba 0xfffd
0xbb 0xfffd
0xbc 0xfffd
0xbd 0xfffd
0xbe 0xfffd
0xbf 0xfffd
0xc0 0xfffd
0xc1 0xfffd
0xc2 0xfffd
0xc3 0xfffd
0xc4 0xfffd
0xc5 0xfffd
0xc6 0xfffd
0xc7 0xfffd
0xc8 0xfffd
0xc9 0xfffd
0xca 0xfffd
0xcb 0xfffd
0xcc 0xfffd
0xcd 0xfffd
0xce 0xfffd
0xcf 0xfffd
0xd0 0xfffd
0xd1 0xfffd
0xd2 0xfffd
0xd3 0xfffd
0xd4 0xfffd
0xd5 0xfffd
0xd6 0xfffd
0xd7 0xfffd
0xd8 0xfffd
0xd9 0xfffd
0xda 0xfffd
0xdb 0xfffd
0xdc 0xfffd
0xdd 0xfffd
0xde 0xfffd
0xdf 0xfffd
0xe0 0xfffd
0xe1 0xfffd
0xe2 0xfffd
0xe3 0xfffd
0xe4 0xfffd
0xe5 0xfffd
0xe6 0xfffd
0xe7 0xfffd
0xe8 0xfffd
0xe9 0xfffd
0xea 0xfffd
0xeb 0xfffd
0xec 0xfffd
0xed 0xfffd
0xee 0xfffd
0xef 0xfffd
0xf0 0xfffd
0xf1 0xfffd
0xf2 0xfffd
0xf3 0xfffd
0xf4 0xfffd
0xf5 0xfffd
0xf6 0xfffd
0xf7 0xfffd
0xf8 0xfffd
0xf9 0xfffd
0xfa 0xfffd
0xfb 0xfffd
0xfc 0xfffd
0xfd 0xfffd
0xfe 0xfffd
0xff 0xfffd
|