File: make_rfc1345_maps

package info (click to toggle)
libunicode-map8-perl 0.13%2Bdfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 996 kB
  • sloc: perl: 495; ansic: 439; sh: 6; makefile: 2
file content (133 lines) | stat: -rwxr-xr-x 2,889 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/perl -w

open(RFC, "rfc1345.txt") || die;

my %mnemonics;
my %fullname;
my %aliases;

my $charset;
my @aliases;
my $code;
my @map;

unless (-d "maps") {
    print STDERR "Making directory 'maps'\n";
    mkdir('maps', 0777) || die "Can't create directory: $!";
}

if (open(ALIASES, "maps/aliases")) {
    while (<ALIASES>) {
	next if /^\s*\#/;
	next if /^\s*$/;
	chomp;
	my($charset, @aliases) = split(' ', $_);
	$aliases{$charset} = { map {$_ => 1} @aliases };
    }
}

while (<RFC>) {
    if (/^3\.\s/ .. /^4\.\s/) {  # only want chapter 3
	if (/^ (\S+)\s+([0-9a-f]{4})\s+(.+)/) {
	    $mnemonics{$1} = $2;
	    $fullname{$3}  = $2;
	}
    }
    if (/^5\.\s/ .. /^ACKNOWLEDGEMENTS/) {
	if (/^  &charset\s+(\S+)/) {
	    #print "$1\n";
	    map_out();
	    $charset = $1;
	    @aliases = ();
	    undef($code);
	    @map = ();
	} elsif (/^  &alias\s+(\S+)/) {
	    #print " $1\n";
	    push(@aliases, $1);
	} elsif (/^  &bits\s+(\d+)/) {
	    #print " BITS = $1\n";
	    if ($1 ne '8') {
		undef($charset);  # don't care about this one
	    }
	} elsif (/^  &code\s+(\d+)/) {
	    #print " CODE=$1\n";
	    $code = $1;
	} elsif (/^  &duplicate\s+(\d+)\s+(\S+)/) {
	    #print "DUP $1 $2\n";
	    push(@map, [$1, $2]);
	} elsif (/^  &([a-z][a-z0-9]+)/) {
	    #print "$1\n";
	} elsif (/^  (\S+ +.*)/ && $charset && defined($code)) {
	    my $mne;
	    for $mne (split(' ', $1)) {
		if ($mne eq "??") {
		    $code++;
		    next;
		} else {
		    if ($code > 255) {
			print STDERR "$charset: bad code $code\n";
			undef($charset);  # ignore it
			last;
		    }
		    push(@map, [$code++, $mne]);
		}
	    }
	}
    }
}
map_out();

open(ALIASES, ">maps/aliases") || die "Can't write aliases: $!";
for (sort keys %aliases) {
    delete $aliases{$_}{$_};  # if we managed to get an alias to ourself
    print ALIASES "$_ ", join(" ", sort keys %{$aliases{$_}}), "\n";
}
close(ALIASES);



sub map_out
{
    return unless $charset;

    while ($charset =~ /[^\w\-\.]/) {
	my $orig = $charset;
	if ($charset =~ s/:\d+$// ||
	    $charset =~ s/_\(\d+\)$//)
	{
	    push(@aliases, $orig);
	} else {
            die "Can't wash $charset\n";
        }
    }
    if ($charset =~ /^ISO_8859-(\d+)$/) {
	push(@aliases, "iso8859-$1", "8859-$1");
	# Fix the MACRON vs. OVERLINE bug in these encodings
	if ($1 == 1 || $1 == 4 || $1 == 9) {
	    splice(@map, 0xAF, 0, [0xAF, "'m"]);
	}
    }

    print STDERR "$charset @aliases\n";
    for (@aliases) {
	$aliases{$charset}{$_}++;
    }

    open(BINMAP, "| perl ./map8_txt2bin >maps/$charset.bin") or die;
    binmode BINMAP;
    for (@map) {
	my($code, $mne) = @$_;
	my $x4 = $mnemonics{$mne};
	unless ($x4) {
	    print STDERR "$charset: no code for $mne\n";
	    next;
	}
	if ($code < 0 || $code > 255) {
	    print STDERR "$charset: bad code $code\n";
	    next;
	}
	printf BINMAP "0x%02x 0x%s\n", $code, $x4;
    }
    close(BINMAP);
    
}