File: jis0208-extra.pl

package info (click to toggle)
libmoe 1.5.7-1
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k, lenny, sarge
  • size: 6,772 kB
  • ctags: 267,602
  • sloc: ansic: 478,515; perl: 2,318; makefile: 201; sh: 22
file content (76 lines) | stat: -rw-r--r-- 1,884 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# Script to make extra JIS X 0208 <-> Big Five (in Unicode) mapping table
# based on information from Ambrose Li <acli@ada.dhs.org>.
#   perl jis0208-extra.pl /path/to/JIS0208.TXT.extra

BEGIN {require 'mbcesdefs.pl'}

$j2u = 'jis0208-to-ucs-extra.mk_btri.h' if (!defined($j2u));
$u2j = 'ucs-to-jis0208-extra.mk_btri.h' if (!defined($u2j));
$html = '/dev/null' if (!defined($html));

$src = shift(@ARGV);

local (*S, *H);

open(S, $src) || die "open(S, \"$src\"): $!";
open(H, ">$html") || die "open(H, \"$html\"): $!";

print H &html_begin();

my @map;

while (<S>) {
  if (/^0x([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})\s+0x([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})\s+0x([0-9A-Fa-f]+)\s+(\#.*)/) {
    my ($s1, $s2, $j1, $j2, $u, $cmt) = (hex($1), hex($2), hex($3), hex($4), hex($5), $6);

    $cmt =~ s/[<>&]/$& eq '<' ? '&lt;' : $& eq '>' ? '&gt;' : '&amp;'/ge;
    printf H "<tr><td>%s%c%c%s</td><td>&#x%04X;</td><td>%s</td></tr>\n", "\x1B\x24\x42", $j1, $j2, "\x1B\x28\x42", $u, $cmt;
    push(@map, MB_SJIS_ENC($s1, $s2) => $u);
  }
}

print H &html_end();

close(H);
close(S);

local (*J2U, *U2J);

open(J2U, ">$j2u") || die "open(J2U, \">$j2u\"): $!";
open(U2J, ">$u2j") || die "open(U2J, \">$u2j\"): $!";
print J2U "%%TYPE number\n%%BEGIN\n\n";
print U2J "%%TYPE number\n%%BEGIN\n\n";

my ($j, $u);

while (@map >= 2) {
  ($j, $u) = splice(@map, 0, 2);
  printf J2U "0x%X,0x%X\n", $j, $u;
  printf U2J "0x%X,0x%X\n", $u, $j;
}

close(U2J);
close(J2U);
exit;

sub html_begin {
<<'EOF';
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html lang="ja">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-2022-jp">
<title>JIS0208.TXT.errata</title>
</head>
<body>
<table border="1">
<tr><td>JIS</td><td>Unicode (Big5)</td><td>Unicode (Big5) name</td></tr>
EOF
}

sub html_end {
<<'EOF';
</table>
</body>
</html>
EOF
}