1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
|
#!/usr/bin/perl
# latin_diacr patterns:
# 'LATIN (SMALL|CAPITAL) (LETTER|LIGATURE) ((LONG|OPEN|DOTLESS) )?([^ ;]+)( DIGRAPH)?( WITH ([^ ;]+)( AND ([ ;]+))?)?'
# 'LATIN $1 $2 $3$5$6 WITH $8'
# 'LATIN (SMALL|CAPITAL) (LETTER|LIGATURE) ((LONG|OPEN|DOTLESS) )?([^ ;]+)( DIGRAPH)?( WITH [^ ;]+)?'
# 'LATIN $1 $2 $3$5$6'
# 'LATIN (SMALL|CAPITAL) (LETTER|LIGATURE) ((LONG|OPEN|DOTLESS) )?[^ ;]+( DIGRAPH)?'
# 'LATIN $1 $2 $5$6'
# 'LATIN (SMALL|CAPITAL) (LETTER|LIGATURE) [^ ;]+( DIGRAPH)?'
# latin_diacr_HOPEFULL_PATTERN='LATIN'
#($FULL_PATTERN, $REDUCTED_PATTERN, $TRANSLATION) = @ARGV;
#printf (" => `%s'\n =>`%s'\n =>`%s'\n", $FULL_PATTERN, $REDUCTED_PATTERN, $TRANSLATION);
#
# Parse command-line: fill @PATTERNS and @TRANSLATION alternatively
#
# @PATTERNS=();
# @TRANSLATIONS=();
# $arg_parity = 0;
# foreach $arg (@ARGV) {
# if ($arg_parity == 1) {
# push @TRANSLATIONS, $arg;
# } else {
# push @PATTERNS, $arg;
# }
# $arg_parity = not $arg_parity;
# }
$PATTERNS=$ARGV[0];
shift @ARGV;
@TRANSLATIONS = @ARGV;
use Data::Dumper;
$Data::Dumper::Indent = 3;
$Data::Dumper::Terse = 1;
#printf ("\@PATTERNS = %s\@TRANSLATIONS = %s\n", Dumper(\@PATTERNS), Dumper(\@TRANSLATIONS));
printf ("\$PATTERNS = %s\@TRANSLATIONS = %s\n", Dumper($PATTERNS), Dumper(\@TRANSLATIONS));
exit 0;
#
# Parse each input line
#
UNICODE: while (<STDIN>) {
# ($code, $label, $categ, $comClass,
# $bidiClass, $decompos, $decDigit, $digit,
# $numeric, $mirror, $oldName, $comment,
# $upper, $lower, $title) = split (/;/);
($code, $label, $categ, undef,
undef, undef, undef, undef,
undef, undef, undef, undef,
undef, undef, undef) = split (/;/);
# skip control chars
next UNICODE if (index ($categ, "C") == 0);
for ($i = 0; $i < $#PATTERNS; $i++) {
if ($label =~ m/^$PATTERNS[$i]$/) {
printf ("# `%s'", $label);
for ($j = $i; $j <= $#TRANSLATIONS; $j++) {
$transl = $label;
$transl =~ s/^$PATTERNS[$i]$/eval"\"$TRANSLATIONS[$j]\""/e;
printf (" : `%s'", $transl);
}
print "\n";
next UNICODE; # don't try to match with a smaller pattern
} else {
printf ("# *SKIPPED* `%s'\n", $label);
}
}
}
|