File: normalize-dump.py

package info (click to toggle)
subversion 1.6.12dfsg-7
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 48,292 kB
  • ctags: 47,714
  • sloc: ansic: 578,414; python: 77,551; sh: 13,100; ruby: 12,194; cpp: 10,097; java: 8,428; lisp: 7,702; perl: 7,320; makefile: 1,035; xml: 759; sql: 62
file content (117 lines) | stat: -rwxr-xr-x 3,187 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python

import sys
import re

header_re = re.compile(r'^([^:]*): ?(.*)$')

class NodePath:
    def __init__(self, path, headers):
        self.path = path
        self.headers = headers

    def dump(self):
        print((' ' * 3) + self.path)
        headers = sorted(self.headers.keys())
        for header in headers:
            print((' ' * 6) + header + ': ' + self.headers[header])


def dump_revision(rev, nodepaths):
    sys.stderr.write('* Normalizing revision ' + rev + '...')
    print('Revision ' + rev)
    paths = sorted(nodepaths.keys())
    for path in paths:
        nodepath = nodepaths[path]
        nodepath.dump()
    sys.stderr.write('done\n')



def parse_header_block(fp):
    headers = {}
    while 1:
        line = fp.readline()
        if line == '':
            return headers, 1
        line = line.strip()
        if line == '':
            return headers, 0
        matches = header_re.match(line)
        if not matches:
            raise Exception('Malformed header block')
        headers[matches.group(1)] = matches.group(2)


def parse_file(fp):
    nodepaths = {}
    current_rev = None

    while 1:
        # Parse a block of headers
        headers, eof = parse_header_block(fp)

        # This is a revision header block
        if 'Revision-number' in headers:

            # If there was a previous revision, dump it
            if current_rev:
                dump_revision(current_rev, nodepaths)

            # Reset the data for this revision
            current_rev = headers['Revision-number']
            nodepaths = {}

            # Skip the contents
            prop_len = headers.get('Prop-content-length', 0)
            fp.read(int(prop_len))

        # This is a node header block
        elif 'Node-path' in headers:

            # Make a new NodePath object, and add it to the
            # dictionary thereof
            path = headers['Node-path']
            node = NodePath(path, headers)
            nodepaths[path] = node

            # Skip the content
            text_len = headers.get('Text-content-length', 0)
            prop_len = headers.get('Prop-content-length', 0)
            fp.read(int(text_len) + int(prop_len))

        # Not a revision, not a node -- if we've already seen at least
        # one revision block, we are in an errorful state.
        elif current_rev and len(headers.keys()):
            raise Exception('Header block from outta nowhere')

        if eof:
            if current_rev:
                dump_revision(current_rev, nodepaths)
            break

def usage():
    print('Usage: ' + sys.argv[0] + ' [DUMPFILE]')
    print('')
    print('Reads a Subversion dumpfile from DUMPFILE (or, if not provided,')
    print('from stdin) and normalizes the metadata contained therein,')
    print('printing summarized and sorted information.  This is useful for')
    print('generating data about dumpfiles in a diffable fashion.')
    sys.exit(0)

def main():
    if len(sys.argv) > 1:
        if sys.argv[1] == '--help':
            usage()
        fp = open(sys.argv[1], 'rb')
    else:
        fp = sys.stdin
    parse_file(fp)


if __name__ == '__main__':
    main()