File: process-biltrans-output.py

package info (click to toggle)
apertium-lex-tools 0.5.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,212 kB
  • sloc: python: 5,788; cpp: 3,086; xml: 395; makefile: 86; awk: 63; sh: 61
file content (38 lines) | stat: -rwxr-xr-x 747 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/python
# coding=utf-8
# -*- encoding: utf-8 -*-

import sys

def processWord(c):
	lemma = ''
	tags = ''
	c = sys.stdin.read(1)
	while c != '<':
		if c == '*':
			while c and c != '$':
				lemma += c
				c = sys.stdin.read(1)
			sys.stdout.write(lemma.replace(' ', '~') + ' ')
			return
		lemma += c
		c = sys.stdin.read(1)
	while c != '$':
		tags += c
		c = sys.stdin.read(1)
	sys.stdout.write(lemma.replace(' ', '~') + tags.replace(' ', '~') + ' ')

c = sys.stdin.read(1)
while c:
	# Beginning of a lexical unit
	if c == '^':
		processWord(c)
	if c == '[':
		while c and c != ']':
			if c == '\n':
				sys.stdout.write('\n')
			c = sys.stdin.read(1)
	# Newline is newline
	if c == '\n':
		sys.stdout.write('\n')
	c = sys.stdin.read(1)