1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
|
#!/usr/bin/perl
#
# Parse command-line
#
$PATTERN=$ARGV[0];
shift @ARGV;
@TRANSLATIONS = @ARGV;
#
# Parse each input line
#
my %unicodes = (); # label => code for each char matching $PATTARN
my @fallbacks = (); # array of fallback entries, each of which is stored as
# a hash whose keys are "labels" and "codes", and values
# are refs to arrays.
UNICODE: while (<STDIN>) {
# ($code, $label, $categ, $comClass,
# $bidiClass, $decompos, $decDigit, $digit,
# $numeric, $mirror, $oldName, $comment,
# $upper, $lower, $title) = split (/;/);
($code, $label, $categ, undef,
undef, undef, undef, undef,
undef, undef, undef, undef,
undef, undef, undef) = split (/;/);
# skip control chars
next UNICODE if (index ($categ, "C") == 0);
# if this line is interesting
if ($label =~ m/$PATTERN/) {
# store the char in the hash for future use
$unicodes{$label} = $code;
# create a new fallback entry
unshift (@fallbacks, {});
$fallbacks[0]{labels} = [$label];
# compute the accepted transformed char-labels
for ($j = 0; $j <= $#TRANSLATIONS; $j++) {
$transl = $label;
$transl =~ s/$PATTERN/eval"\"$TRANSLATIONS[$j]\""/e;
push (@{$fallbacks[0]{labels}}, $transl);
}
next UNICODE; # don't try to match with a smaller pattern
}
}
# use Data::Dumper;
# print (Dumper(\@fallbacks));
# exit 0;
# process collected data into .fallback format
FB_ENTRY: foreach $fallback (@fallbacks) {
# cleanup fallback line
my $tmp = []; # clean version of $fallback->{labels}
my $code;
foreach $char (@{$fallback->{labels}}) {
if (($code = $unicodes{$char}) and not (grep (/$code/, @{$fallback->{codes}}))) {
push (@$tmp, $char);
push (@{$fallback->{codes}}, $code);
}
}
$fallback->{labels} = $tmp;
# if fallback entry only has one char (ie. no fallback), drop it
if ($#{$fallback->{codes}} == 0) {
next FB_ENTRY;
}
# describing comment line
foreach $char (@{$fallback->{labels}}) {
printf ("# %s ", $char);
}
print "\n";
# the entry itself
foreach $char (@{$fallback->{labels}}) {
printf ("U+%s ", $unicodes{$char});
}
print "\n";
}
|