1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
|
def iter_clean_lines(lines):
lines = iter(lines)
for line in lines:
line = line.strip()
if line.startswith('# XXX'):
continue
yield line
def parse_table_lines(lines):
lines = iter_clean_lines(lines)
for line in lines:
if line.startswith(('####', '#----')):
kind = 0 if line[1] == '#' else 1
try:
line = next(lines).strip()
except StopIteration:
line = ''
if not line.startswith('# '):
raise NotImplementedError(line)
yield kind, line[2:].lstrip()
continue
maybe = None
while line.startswith('#'):
if line != '#' and line[1] == ' ':
maybe = line[2:].lstrip()
try:
line = next(lines).strip()
except StopIteration:
return
if not line:
break
else:
if line:
if maybe:
yield 2, maybe
yield 'row', line
def iter_sections(lines):
header = None
section = []
for kind, value in parse_table_lines(lines):
if kind == 'row':
if not section:
if header is None:
header = value
continue
raise NotImplementedError(value)
yield tuple(section), value
else:
if header is None:
header = False
section[kind:] = [value]
def collect_sections(lines):
sections = {}
for section, row in iter_sections(lines):
if section not in sections:
sections[section] = [row]
else:
sections[section].append(row)
return sections
def collate_sections(lines):
collated = {}
for section, rows in collect_sections(lines).items():
parent = collated
current = ()
for name in section:
current += (name,)
try:
child, secrows, totalrows = parent[name]
except KeyError:
child = {}
secrows = []
totalrows = []
parent[name] = (child, secrows, totalrows)
parent = child
if current == section:
secrows.extend(rows)
totalrows.extend(rows)
return collated
#############################
# the commands
def cmd_count_by_section(lines):
div = ' ' + '-' * 50
total = 0
def render_tree(root, depth=0):
nonlocal total
indent = ' ' * depth
for name, data in root.items():
subroot, rows, totalrows = data
sectotal = f'({len(totalrows)})' if totalrows != rows else ''
count = len(rows) if rows else ''
if depth == 0:
yield div
yield f'{sectotal:>7} {count:>4} {indent}{name}'
yield from render_tree(subroot, depth+1)
total += len(rows)
sections = collate_sections(lines)
yield from render_tree(sections)
yield div
yield f'(total: {total})'
#############################
# the script
def parse_args(argv=None, prog=None):
import argparse
parser = argparse.ArgumentParser(prog=prog)
parser.add_argument('filename')
args = parser.parse_args(argv)
ns = vars(args)
return ns
def main(filename):
with open(filename) as infile:
for line in cmd_count_by_section(infile):
print(line)
if __name__ == '__main__':
kwargs = parse_args()
main(**kwargs)
|