1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
|
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2023 Jonathan Poelen <jonathan.poelen@gmail.com>
# SPDX-License-Identifier: MIT
from typing import Iterable, Mapping
from xml.etree.ElementTree import ElementTree
import sys
def print_usage_and_exit():
print(sys.argv[0], '[-p] syntax.xml...\n -p show duplicate content', file=sys.stderr)
exit(1)
def normalize_bool_or_remove_if_false(d: dict[str, str], key: str) -> None:
value = d.get(key)
if value is not None:
if value == '1' or value.lower() == 'true':
d[key] = '1'
else:
d.pop(key)
def remove_if_stay(d: dict[str, str], key: str) -> None:
value = d.get(key)
if value is not None and (not value or value == '#stay'):
d.pop(key)
if len(sys.argv) < 2 or sys.argv[1] in {'-h', '--help'}:
print_usage_and_exit()
iarg = 1
show_content = sys.argv[1] == '-p'
if show_content:
iarg += 1
if len(sys.argv) < 3:
print_usage_and_exit()
for filename in sys.argv[iarg:]:
tree = ElementTree()
tree.parse(filename)
identical_contexts = {}
for context in tree.getroot()[0].find("contexts"):
if len(context) == 0 or (len(context) == 1 and context[0].tag == 'IncludeRules'):
attrib = context.attrib
name = attrib.pop('name')
remove_if_stay(attrib, 'lineEndContext')
remove_if_stay(attrib, 'lineEmptyContext')
remove_if_stay(attrib, 'fallthroughContext')
if len(context):
attrib.update(context[0].attrib)
normalize_bool_or_remove_if_false(attrib, 'includeAttrib')
s = '\x01'.join(sorted(f'{k}={v}' for k,v in attrib.items()))
identical_contexts.setdefault(s, []).append(name)
else:
rules = set()
for rule in context:
attrib = rule.attrib
remove_if_stay(attrib, 'context')
normalize_bool_or_remove_if_false(attrib, 'dynamic')
normalize_bool_or_remove_if_false(attrib, 'minimal')
normalize_bool_or_remove_if_false(attrib, 'includeAttrib')
normalize_bool_or_remove_if_false(attrib, 'firstNonSpace')
normalize_bool_or_remove_if_false(attrib, 'lookAhead')
s = '\x01'.join(f'{k}={v}' for k,v in sorted(attrib.items()))
rules.add(f'{rule.tag}\x01{s}')
identical_contexts.setdefault('\n'.join(sorted(rules)), []).append(context.attrib['name'])
for content, names in identical_contexts.items():
if len(names) > 1:
print(f'{filename}: {names}')
if show_content:
print(' ', content.replace('\x01', ' ').replace('\n', '\n '))
|