1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
|
#!/usr/bin/python
# coding=utf-8
# -*- encoding: utf-8 -*-
import sys
def processWord(c):
lemma = ''
tags = ''
c = sys.stdin.read(1)
while c != '<':
if c == '*':
while c and c != '$':
lemma += c
c = sys.stdin.read(1)
sys.stdout.write(lemma.replace(' ', '~') + ' ')
return
lemma += c
c = sys.stdin.read(1)
while c != '$':
tags += c
c = sys.stdin.read(1)
sys.stdout.write(lemma.replace(' ', '~') + tags.replace(' ', '~') + ' ')
c = sys.stdin.read(1)
while c:
# Beginning of a lexical unit
if c == '^':
processWord(c)
if c == '[':
while c and c != ']':
if c == '\n':
sys.stdout.write('\n')
c = sys.stdin.read(1)
# Newline is newline
if c == '\n':
sys.stdout.write('\n')
c = sys.stdin.read(1)
|