File: apertium-filter-xml

package info (click to toggle)
apertium 3.9.12-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,024 kB
  • sloc: cpp: 22,288; ansic: 4,875; xml: 2,566; python: 1,428; sh: 1,117; lex: 1,088; makefile: 591
file content (39 lines) | stat: -rwxr-xr-x 1,117 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/usr/bin/python3

import argparse
import xml.etree.ElementTree as ET

parser = argparse.ArgumentParser('filter an xml file for language variety specifiers')
parser.add_argument('var', help='language variety to retain (comma-separated for multiple)')
parser.add_argument('infile', help='input file')
parser.add_argument('outfile', help='output file')
parser.add_argument('-k', '--keep', help='node types to retain unchanged', action='append', default=[])
parser.add_argument('-a', '--attr', help='attribute which specifies language variety (default: v)', default='v')
args = parser.parse_args()

keep = set(args.var.split(','))

def should_keep(node):
	global args, keep
	if args.attr not in node.attrib:
		return True
	vs = set(node.attrib[args.attr].split(','))
	node.attrib.pop(args.attr)
	return not vs.isdisjoint(keep)

def filter_node(node):
	global args, keep
	rem = []
	for ch in node:
		if ch.tag in args.keep:
			continue
		elif should_keep(ch):
			filter_node(ch)
		else:
			rem.append(ch)
	for r in rem:
		node.remove(r)

tree = ET.parse(args.infile)
filter_node(tree.getroot())
tree.write(args.outfile)