File: gin.meta.pl

package info (click to toggle)
gramadoir 0.7-7
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 12,628 kB
  • sloc: perl: 11,207; sh: 2,973; xml: 462; lisp: 196; makefile: 94; yacc: 63; lex: 62; ansic: 26; sed: 16
file content (57 lines) | stat: -rw-r--r-- 2,099 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/perl
# This perl script is used for converting the hopefully-user-readable
# *.in files into the somewhat complicated *.pl scripts
# Copyright (C) 2004, 2005 Kevin P. Scannell <kscanne@gmail.com>
#
# This is free software; see the file COPYING for copying conditions.  There is
# NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
my $attrtags = '['.$ARGV[0].']';
my $noattrtags = '[^BEZ'.$ARGV[0].']';

while (<STDIN>) {
if ( /^[^#]/ ) {
	chomp;
	my ($rule, $action) = m/^([^:]+):(.*)$/;
	my $ans;
	$rule =~ s/\(/(?:/g;
	$rule =~ s/\/>/\\\/>/g;
	$rule =~ s/^/ /;
	$rule =~ s/$/ /;
	# loop over tokens in the rule; the optional <E>,</E> tokens
	# are from the OK lines in rialacha-xx.in
	while ($rule =~ m/(?<= )(<E>)?(<B><Z>(?:(?:<[^>]+>)+|[^<>]+)<\/Z>[^<]+<\/B>|<[^\/][^>]*>[^<]+<\/[^>]+>|[^<> ]+)(<\/E>)?(?= )/g) {
		my $aon = $1;
		my $tok = $2;
		my $tri = $3;
		# e.g. <[^N]>ANYTHING</[^N]>
		if ($tok =~ m/^<\[\^/) {
			$tok =~ s/<\[\^([A-DF-Z]+)\]>([^<]+)<\/[^>]*>/(?:<[^\\\/$1][^>]*>$2<\\\/[^$1]>|<B><Z>(?:<[^$1][^>]*>)+<\\\/Z>$2<\\\/B>)/;
		}
		# e.g. <[AN]>ANYTHING</[AN]>
		elsif ($tok =~ m/^<\[[^\^]/) {
			$tok =~ s/<\[([A-DF-Z]+)\]>([^<]+)<\/[^>]*>/(?:<[$1][^>]*>$2<\\\/[$1]>|<B><Z>(?:<[$1][^>]*>)+<\\\/Z>$2<\\\/B>)/;
		}
		elsif ($tok =~ m/^<[A-Z]/) {
			# e.g. <C>
			$tok =~ s/<($noattrtags)>([^<]+)<\/[A-Z]>/<$1>$2<\\\/$1>/;
			# e.g. <N pl="n" gnt="n">
			$tok =~ s/<($attrtags)( [^>]+)>([^<]+)<\/[A-Z][^>]*>/<${1}${2}>$3<\\\/$1>/;
			# e.g. <N>
			$tok =~ s/<($attrtags)>([^<]+)<\/[A-Z][^>]*>/(?:<${1}[^>]*>$2<\\\/$1>|<B><Z>(?:<${1}[^>]*>)+<\\\/Z>$2<\\\/B>)/;
			# e.g. <NCS>, or (see wa) <NCS h="y">
			$tok =~ s/<($attrtags)([^ >][^>]*)>([^<]+)<\/[A-Z][^>]*>/(?:<${1}[^>]*${2}[^>]*>$3<\\\/$1>|<B><Z>(?:<${1}[^>]*${2}[^>]*>)+<\\\/Z>$3<\\\/B>)/;
		}
		# e.g. barewords or macros with no tags
		else {
			$tok =~ s/([^<> ]+)/(?:<[\\\/A-DF-Z][^>]*>)+$1<\\\/[A-DF-Z]>/;
			$tok =~ s/\+([^(<]*\|[^(<]*)</+(?:$1)</g;
		}
		$tri =~ s/\//\\\//;  # </E> -> <\/E>
		$ans .= "$aon$tok$tri ";
	}
	$ans =~ s/ $/:$action/;
	print "$ans\n";
}
}
exit;