File: apertium-filter-rules

package info (click to toggle)
apertium 3.9.12-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,024 kB
  • sloc: cpp: 22,288; ansic: 4,875; xml: 2,566; python: 1,428; sh: 1,117; lex: 1,088; makefile: 591
file content (58 lines) | stat: -rwxr-xr-x 1,803 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/python3
#
# Copyright (C) 2024 Apertium
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <https://www.gnu.org/licenses/>.
#

import argparse
import sys
import xml.etree.ElementTree as ET

parser = argparse.ArgumentParser('Filter out language-specific rules from Apertium XML files')
parser.add_argument('lang', nargs='+', action='store')
parser.add_argument('infile', action='store')
parser.add_argument('outfile', action='store', default='-', nargs='?')
args = parser.parse_args()

Langs = set(args.lang)

SKIP_TAGS = [
	'with-param', 'param', 'list-item', 'lit', 'lit-tag', 'attr-item', 'def-var'
]

def process_node(node):
	to_remove = []
	for ch in node:
		if ch.tag in SKIP_TAGS:
			continue
		if 'v' in ch.attrib:
			node_langs = set(ch.attrib['v'].split())
			del ch.attrib['v']
			if Langs <= node_langs:
				process_node(ch)
			else:
				to_remove.append(ch)
		else:
			process_node(ch)
	for ch in to_remove:
		node.remove(ch)

infile = sys.stdin if args.infile == '-' else args.infile
outfile = sys.stdout.buffer if args.outfile == '-' else args.outfile

parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True))
tree = ET.parse(infile, parser=parser)
process_node(tree.getroot())
tree.write(outfile, encoding='utf-8')