File: lambdas-to-rules.py

package info (click to toggle)
apertium-lex-tools 0.1.1~r66150-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 13,992 kB
  • sloc: cpp: 5,849; python: 5,145; xml: 317; makefile: 128; awk: 63; sh: 43
file content (78 lines) | stat: -rw-r--r-- 1,699 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import sys;
import common;

def wrap (x):
	return '^' + x + '$'

sl_tl_defaults = {}; 
sl_tl = {};

indexes = {};
trad_counter = {}; 
rindex = {};

with open(sys.argv[1]) as d:
	for line in d: #{
		if len(line) < 1: #{
			continue;
		#}
		row = common.tokenise_tagger_line(line);
		sl = wrap(row[0].strip());
		tl = wrap(row[1].strip());
		if tl[1] == '*':
			tl = tl[:-3] + '$'

		if sl not in sl_tl: #{
			sl_tl[sl] = [];
		#}
		if sl not in trad_counter: #{
			trad_counter[sl] = 0;
		#}
		if line.count('@') > 0: #{
			sl_tl_defaults[sl] = tl;
		#}
		sl_tl[sl].append(tl);
		indexes[(sl, tl)] = trad_counter[sl];
		rindex[(sl, trad_counter[sl])] = tl;
		trad_counter[sl] = trad_counter[sl] + 1;

	#}

for pair in rindex: #{
	print(pair[0], pair[1], rindex[pair], file=sys.stderr);
#}

#ability<n> 	 0.25652 	 1 	 ability<n> to<pr>
#ability<n> 	 1.54548 	 0 	 ability<n> to<pr> deliver<vblex><inf>
#ability<n> 	 1.48162 	 0 	 our<det><pos> ability<n> to<pr>

with open(sys.argv[2]) as d:
	for line in d: #{

		row = line.split(' \t '); 
		slword = row[0].strip();
		l = float(row[1]);
		tlid = int(row[2]);
		if (slword, tlid) not in rindex: #{
			print ('(', slword, ',', tlid, ') not in index', file=sys.stderr)
			continue;
		#}
		tlword = rindex[(slword, tlid)];
		context = row[3].strip();
	#	#+ 0.571428571429 14 8 8 	troiñ<vblex>		tourner<vblex>	8
	#+nature<n>	service<n> nature<n>	carácter<n>	3


		print('+ ' + row[1] + '\t' + slword + '\t' + context + '\t' + tlword + '\t1');

	#	print('  <rule weight="%.5f">' % (l));
	#	for c in context.split(' '): #{
	#		if c.count(slword) == 1: #{
	#			print(slword, tlword);
	#		else: #{
	#			print(c);	
	#		#}
	#	#}
	#	print('  </rule>');

	#}