File: strip-empty-lines.py

package info (click to toggle)
apertium-lex-tools 0.5.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,212 kB
  • sloc: python: 5,788; cpp: 3,086; xml: 395; makefile: 86; awk: 63; sh: 61
file content (40 lines) | stat: -rwxr-xr-x 739 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/bin/python3
# coding=utf-8
# -*- encoding: utf-8 -*-

import sys

#perl "$MOSESDECODER/clean-corpus-n.perl" data.$SL-$TL/$CORPUS.tagged $SL $TL "data.$SL-$TL/$CORPUS.tagged-clean" 1 40


prefix = sys.argv[1]
sl = sys.argv[2]
tl = sys.argv[3]
outfix = sys.argv[4]

sl_f = open(prefix + '.' + sl)
tl_f = open(prefix + '.' + tl)

sl_o = open(outfix + '.' + sl, 'w+')
tl_o = open(outfix + '.' + tl, 'w+')

inlines = 0
outlines = 0

while True:

	slline = sl_f.readline()
	tlline = tl_f.readline()

	if not slline and not tlline:
		break

	inlines += 1
	if slline.strip() == '' or tlline.strip() == '':
		continue
	else:
		sl_o.write(slline)
		tl_o.write(tlline)
		outlines = outlines + 1

print('in: %d, out: %d' % (inlines, outlines))