1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
|
#!/usr/bin/perl
# This perl script is used for converting the hopefully-user-readable
# *.in files into the somewhat complicated *.pl scripts
# Copyright (C) 2004, 2005 Kevin P. Scannell <kscanne@gmail.com>
#
# This is free software; see the file COPYING for copying conditions. There is
# NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
my $attrtags = '['.$ARGV[0].']';
my $noattrtags = '[^BEZ'.$ARGV[0].']';
while (<STDIN>) {
if ( /^[^#]/ ) {
chomp;
my ($rule, $action) = m/^([^:]+):(.*)$/;
my $ans;
$rule =~ s/\(/(?:/g;
$rule =~ s/\/>/\\\/>/g;
$rule =~ s/^/ /;
$rule =~ s/$/ /;
# loop over tokens in the rule; the optional <E>,</E> tokens
# are from the OK lines in rialacha-xx.in
while ($rule =~ m/(?<= )(<E>)?(<B><Z>(?:(?:<[^>]+>)+|[^<>]+)<\/Z>[^<]+<\/B>|<[^\/][^>]*>[^<]+<\/[^>]+>|[^<> ]+)(<\/E>)?(?= )/g) {
my $aon = $1;
my $tok = $2;
my $tri = $3;
# e.g. <[^N]>ANYTHING</[^N]>
if ($tok =~ m/^<\[\^/) {
$tok =~ s/<\[\^([A-DF-Z]+)\]>([^<]+)<\/[^>]*>/(?:<[^\\\/$1][^>]*>$2<\\\/[^$1]>|<B><Z>(?:<[^$1][^>]*>)+<\\\/Z>$2<\\\/B>)/;
}
# e.g. <[AN]>ANYTHING</[AN]>
elsif ($tok =~ m/^<\[[^\^]/) {
$tok =~ s/<\[([A-DF-Z]+)\]>([^<]+)<\/[^>]*>/(?:<[$1][^>]*>$2<\\\/[$1]>|<B><Z>(?:<[$1][^>]*>)+<\\\/Z>$2<\\\/B>)/;
}
elsif ($tok =~ m/^<[A-Z]/) {
# e.g. <C>
$tok =~ s/<($noattrtags)>([^<]+)<\/[A-Z]>/<$1>$2<\\\/$1>/;
# e.g. <N pl="n" gnt="n">
$tok =~ s/<($attrtags)( [^>]+)>([^<]+)<\/[A-Z][^>]*>/<${1}${2}>$3<\\\/$1>/;
# e.g. <N>
$tok =~ s/<($attrtags)>([^<]+)<\/[A-Z][^>]*>/(?:<${1}[^>]*>$2<\\\/$1>|<B><Z>(?:<${1}[^>]*>)+<\\\/Z>$2<\\\/B>)/;
# e.g. <NCS>, or (see wa) <NCS h="y">
$tok =~ s/<($attrtags)([^ >][^>]*)>([^<]+)<\/[A-Z][^>]*>/(?:<${1}[^>]*${2}[^>]*>$3<\\\/$1>|<B><Z>(?:<${1}[^>]*${2}[^>]*>)+<\\\/Z>$3<\\\/B>)/;
}
# e.g. barewords or macros with no tags
else {
$tok =~ s/([^<> ]+)/(?:<[\\\/A-DF-Z][^>]*>)+$1<\\\/[A-DF-Z]>/;
$tok =~ s/\+([^(<]*\|[^(<]*)</+(?:$1)</g;
}
$tri =~ s/\//\\\//; # </E> -> <\/E>
$ans .= "$aon$tok$tri ";
}
$ans =~ s/ $/:$action/;
print "$ans\n";
}
}
exit;
|