1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
|
#!/usr/bin/env python
import sys
import re
header_re = re.compile(r'^([^:]*): ?(.*)$')
class NodePath:
def __init__(self, path, headers):
self.path = path
self.headers = headers
def dump(self):
print((' ' * 3) + self.path)
headers = sorted(self.headers.keys())
for header in headers:
print((' ' * 6) + header + ': ' + self.headers[header])
def dump_revision(rev, nodepaths):
sys.stderr.write('* Normalizing revision ' + rev + '...')
print('Revision ' + rev)
paths = sorted(nodepaths.keys())
for path in paths:
nodepath = nodepaths[path]
nodepath.dump()
sys.stderr.write('done\n')
def parse_header_block(fp):
headers = {}
while 1:
line = fp.readline()
if line == '':
return headers, 1
line = line.strip()
if line == '':
return headers, 0
matches = header_re.match(line)
if not matches:
raise Exception('Malformed header block')
headers[matches.group(1)] = matches.group(2)
def parse_file(fp):
nodepaths = {}
current_rev = None
while 1:
# Parse a block of headers
headers, eof = parse_header_block(fp)
# This is a revision header block
if 'Revision-number' in headers:
# If there was a previous revision, dump it
if current_rev:
dump_revision(current_rev, nodepaths)
# Reset the data for this revision
current_rev = headers['Revision-number']
nodepaths = {}
# Skip the contents
prop_len = headers.get('Prop-content-length', 0)
fp.read(int(prop_len))
# This is a node header block
elif 'Node-path' in headers:
# Make a new NodePath object, and add it to the
# dictionary thereof
path = headers['Node-path']
node = NodePath(path, headers)
nodepaths[path] = node
# Skip the content
text_len = headers.get('Text-content-length', 0)
prop_len = headers.get('Prop-content-length', 0)
fp.read(int(text_len) + int(prop_len))
# Not a revision, not a node -- if we've already seen at least
# one revision block, we are in an errorful state.
elif current_rev and len(headers.keys()):
raise Exception('Header block from outta nowhere')
if eof:
if current_rev:
dump_revision(current_rev, nodepaths)
break
def usage():
print('Usage: ' + sys.argv[0] + ' [DUMPFILE]')
print('')
print('Reads a Subversion dumpfile from DUMPFILE (or, if not provided,')
print('from stdin) and normalizes the metadata contained therein,')
print('printing summarized and sorted information. This is useful for')
print('generating data about dumpfiles in a diffable fashion.')
sys.exit(0)
def main():
if len(sys.argv) > 1:
if sys.argv[1] == '--help':
usage()
fp = open(sys.argv[1], 'rb')
else:
fp = sys.stdin
parse_file(fp)
if __name__ == '__main__':
main()
|