File: cjk_test

package info (click to toggle)
libunicode-collate-perl 1.31-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 5,492 kB
  • sloc: perl: 28,296; makefile: 3
file content (99 lines) | stat: -rw-r--r-- 2,368 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!perl
# cjk_test: the whole tests for CJK/*.pm
#
# input files: ../Collate/CJK/*.pm
#
# * All successful when the output is as below:
#
# OK - ja compare
# OK - ja count
# OK - ko compare
# OK - ko count
# OK - zh_big5 compare
# OK - zh_big5 count
# OK - zh_gb2312 compare
# OK - zh_gb2312 count
# OK - zh_pinyin compare
# OK - zh_pinyin count
# OK - zh_stroke compare
# OK - zh_stroke count
# OK - zh_zhuyin compare
# OK - zh_zhuyin count
#
use strict;

use File::Spec;
use Unicode::Collate::Locale;

my %file = qw(
    zh_big5     Big5
    zh_gb2312   GB2312
    ja          JISX0208
    ko          Korean
    zh_pinyin   Pinyin
    zh_stroke   Stroke
    zh_zhuyin   Zhuyin
);

sub testdata ($$) {
    my $locale = shift;
    my $file   = $file{$locale};
    my $count  = shift;
    my $korean = $locale eq 'ko'; # Hangul <2 Unified Ideographs.

    my $d = File::Spec->updir();
    my $f = File::Spec->catfile($d, 'Collate', 'CJK', "$file.pm");
    open my $fh, "<$f" or die $f;

    my $col = Unicode::Collate::Locale->new(locale => $locale, level => 1);
    my $pre = undef;
    my $prc = '';
    my @err;
    my $done = 0;
    while (<$fh>) {
	if (!defined $pre) {
	   $pre = '' if /^__DATA__/;
	   next;
	}
	last if /^__END__/;
	my @c = split;
	for my $c (@c) {
	    ++$done;
	    $c =~ s/:.*//; # format change, Korean 1.13
	    my $u = pack('U*', map hex, split '-', $c);
	    if ($korean) {
		$col->change(level => 1);
		if (@c != 1 || $c[0] !~ /^[A-D]/) { # not Hangul
		    push @err, "$c($prc)" unless $col->eq($pre, $u);
		    $col->change(level => 2);
		}
	    }
	    push @err, "$c($prc)" unless $col->lt($pre, $u);
	    push @err, "$c(A)" unless $col->lt('A', $u);
	    $pre = $u;
	    $prc = $c;
	}
    }
    print @err            ? "NG @err"  : "OK",   " - $locale compare\n";
    print $count != $done ? "NG $done" : "OK",   " - $locale count\n";
    close $fh;
}

testdata('ja', 6355);
testdata('ko', 7953);
testdata('zh_big5',   13060);
testdata('zh_gb2312',  6768);
testdata('zh_pinyin', 20916); # 2.0 short
testdata('zh_stroke', 23684); # 2.0 short
testdata('zh_zhuyin', 22160); # 22 short

__END__

past test counts:

testdata('zh_pinyin', 20893); # 2.0 short without U+FDD0
testdata('zh_pinyin', 20861); # 1.9.1 short
testdata('zh_pinyin', 20994); # 1.8.1

testdata('zh_stroke', 23647); # 1.9.1 short
testdata('zh_stroke', 13057); # 1.8.1