1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
|
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Normalize " .
"cannot stringify a Unicode code point\n";
exit 0;
}
}
BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
BEGIN {
unless (5.006001 <= $]) {
print "1..0 # skipped: Perl 5.6.1 or later".
" needed for this test\n";
exit;
}
}
#########################
use Test;
use strict;
use warnings;
BEGIN { plan tests => 34 };
use Unicode::Normalize qw(:all);
ok(1); # If we made it this far, we're ok.
sub _pack_U { Unicode::Normalize::pack_U(@_) }
sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
#########################
our $proc; # before the last starter
our $unproc; # the last starter and after
# If string has no starter, entire string is set to $unproc.
($proc, $unproc) = splitOnLastStarter("");
ok($proc, "");
ok($unproc, "");
($proc, $unproc) = splitOnLastStarter("A");
ok($proc, "");
ok($unproc, "A");
($proc, $unproc) = splitOnLastStarter(_pack_U(0x41, 0x300, 0x327, 0x42));
ok($proc, _pack_U(0x41, 0x300, 0x327));
ok($unproc, "B");
($proc, $unproc) = splitOnLastStarter(_pack_U(0x4E00, 0x41, 0x301));
ok($proc, _pack_U(0x4E00));
ok($unproc, _pack_U(0x41, 0x301));
($proc, $unproc) = splitOnLastStarter(_pack_U(0x302, 0x301, 0x300));
ok($proc, "");
ok($unproc, _pack_U(0x302, 0x301, 0x300));
our $ka_grave = _pack_U(0x41, 0, 0x42, 0x304B, 0x300);
our $dakuten = _pack_U(0x3099);
our $ga_grave = _pack_U(0x41, 0, 0x42, 0x304C, 0x300);
our ($p, $u) = splitOnLastStarter($ka_grave);
our $concat = $p . NFC($u.$dakuten);
ok(NFC($ka_grave.$dakuten) eq $ga_grave);
ok(NFC($ka_grave).NFC($dakuten) ne $ga_grave);
ok($concat eq $ga_grave);
##############
sub arraynorm {
my $form = shift;
my @string = @_;
my $result = "";
my $unproc = "";
foreach my $str (@string) {
$unproc .= $str;
my $n = normalize($form, $unproc);
my($p, $u) = splitOnLastStarter($n);
$result .= $p;
$unproc = $u;
}
$result .= $unproc;
return $result;
}
my $strD = "\x{3C9}\x{301}\x{1100}\x{1161}\x{11A8}\x{1100}\x{1161}\x{11AA}";
my $strC = "\x{3CE}\x{AC01}\x{AC03}";
my @str1 = (substr($strD,0,3), substr($strD,3,4), substr($strD,7));
my @str2 = (substr($strD,0,1), substr($strD,1,3), substr($strD,4));
ok($strC eq NFC($strD));
ok($strD eq join('', @str1));
ok($strC eq arraynorm('NFC', @str1));
ok($strD eq join('', @str2));
ok($strC eq arraynorm('NFC', @str2));
my @strX = ("\x{300}\x{AC00}", "\x{11A8}");
my $strX = "\x{300}\x{AC01}";
ok($strX eq NFC(join('', @strX)));
ok($strX eq arraynorm('NFC', @strX));
ok($strX eq NFKC(join('', @strX)));
ok($strX eq arraynorm('NFKC', @strX));
my @strY = ("\x{304B}\x{0308}", "\x{0323}\x{3099}");
my $strY = ("\x{304C}\x{0323}\x{0308}");
ok($strY eq NFC(join('', @strY)));
ok($strY eq arraynorm('NFC', @strY));
ok($strY eq NFKC(join('', @strY)));
ok($strY eq arraynorm('NFKC', @strY));
my @strZ = ("\x{304B}\x{0308}", "\x{0323}", "\x{3099}");
my $strZ = ("\x{304B}\x{3099}\x{0323}\x{0308}");
ok($strZ eq NFD(join('', @strZ)));
ok($strZ eq arraynorm('NFD', @strZ));
ok($strZ eq NFKD(join('', @strZ)));
ok($strZ eq arraynorm('NFKD', @strZ));
##############
# don't modify the source
my $source = "ABC";
($proc, $unproc) = splitOnLastStarter($source);
ok($proc, "AB");
ok($unproc, "C");
ok($source, "ABC");
|