1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
|
#! /usr/bin/perl
#
# Compile Scripts.txt into C array declarations.
#
# scripts: an array of script names. The last entry will be for "Unknown";
#
# unicode_rangetab:
#
# The array's structure is [firstchar, lastchar], listing unicode character
# range with the same script. firstchar and lastchar is the last byte in the
# character range/
#
# The ranges are sorted in numerical order.
#
# unicode_classtab:
#
# An array of the same size as unicode_rangetab, gives the index of the
# unicode range's script name, in the scripts array. Neither rangetab nor
# classtab will have entries pointing to "Unknown". All unicode characters
# not in rangetab default to "Unknown";
#
# unicode_indextab:
#
# For each group of 256 characters, an index into rangetab/classtab where
# ranges for those groups of 256 characters are start.
#
# unicode_rangetab stores only the low byte of the starting/ending character
# number.
use strict;
use warnings;
use mkcommon;
my $obj=mkcommon->new;
$obj->{proptype}="char *";
open(F, "<Scripts.txt") || die;
my @table;
my %scriptnames;
my $counter=0;
while (defined($_=<F>))
{
chomp;
next unless /^([0-9A-F]+)(\.\.([0-9A-F]+))?\s*\;\s*([^\s]+)\s*/;
my $f=$1;
my $l=$3;
my $s=$4;
$l=$f unless $l;
eval "\$f=0x$f";
eval "\$l=0x$l";
$scriptnames{$s} //= ++$counter;
push @table, [$f, $l, "unicode_script_" . lc($s)];
}
my @repl = map {
"\tunicode_script_" . lc($_) . ",\n";
} sort {
$scriptnames{$a} <=> $scriptnames{$b};
} keys %scriptnames;
unshift @repl, "\tunicode_script_unknown,\n";
$repl[$#repl] =~ s/,//;
open(F, ">courier-unicode-script-tab.h.tmp") or die;
print F join("", @repl);
close(F) or die;
rename("courier-unicode-script-tab.h.tmp", "courier-unicode-script-tab.h") or die;
grep {
$obj->range($$_[0], $$_[1], $$_[2]);
} sort { $$a[0] <=> $$b[0] } @table;
$obj->output;
|