File: normalize-dump.py

package info (click to toggle)
subversion 1.4.2dfsg1-3
  • links: PTS
  • area: main
  • in suites: etch
  • size: 37,284 kB
  • ctags: 32,888
  • sloc: ansic: 406,472; python: 38,378; sh: 15,438; cpp: 9,604; ruby: 8,313; perl: 5,308; java: 4,576; lisp: 3,860; xml: 3,298; makefile: 856
file content (120 lines) | stat: -rwxr-xr-x 3,263 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/python

import sys
import re
import string

header_re = re.compile(r'^(.*): ?(.*)$')

class NodePath:
    def __init__(self, path, headers):
        self.path = path
        self.headers = headers
        
    def dump(self):
        print (' ' * 3) + self.path
        headers = self.headers.keys()
        headers.sort()
        for header in headers:
            print (' ' * 6) + header + ': ' + self.headers[header]


def dump_revision(rev, nodepaths):
    sys.stderr.write('* Normalizing revision ' + rev + '...')
    print 'Revision ' + rev
    paths = nodepaths.keys()
    paths.sort()
    for path in paths:
        nodepath = nodepaths[path]
        nodepath.dump()
    sys.stderr.write('done\n')
    
        

def parse_header_block(fp):
    headers = {}
    while 1:
        line = fp.readline()
        if line == '':
            return headers, 1
        line = string.strip(line)
        if line == '':
            return headers, 0
        matches = header_re.match(line)
        if not matches:
            raise Exception('Malformed header block')
        headers[matches.group(1)] = matches.group(2)

        
def parse_file(fp):
    nodepaths = {}
    current_rev = None

    while 1:
        # Parse a block of headers
        headers, eof = parse_header_block(fp)

        # This is a revision header block
        if headers.has_key('Revision-number'):

            # If there was a previous revision, dump it
            if current_rev:
                dump_revision(current_rev, nodepaths)

            # Reset the data for this revision
            current_rev = headers['Revision-number']
            nodepaths = {}

            # Skip the contents
            prop_len = headers.get('Prop-content-length', 0)
            fp.read(int(prop_len))

        # This is a node header block
        elif headers.has_key('Node-path'):

            # Make a new NodePath object, and add it to the
            # dictionary thereof
            path = headers['Node-path']
            node = NodePath(path, headers)
            nodepaths[path] = node

            # Skip the content
            text_len = headers.get('Text-content-length', 0)
            prop_len = headers.get('Prop-content-length', 0)
            fp.read(int(text_len) + int(prop_len))

        # Not a revision, not a node -- if we've already seen at least
        # one revision block, we are in an errorful state.
        elif current_rev and len(headers.keys()):
            raise Exception('Header block from outta nowhere')

        if eof:
            if current_rev:
                dump_revision(current_rev, nodepaths)
            break

def usage():
    print 'Usage: ' + sys.argv[0] + ' [DUMPFILE]'
    print ''
    print 'Reads a Subversion dumpfile from DUMPFILE (or, if not provided,'
    print 'from stdin) and normalizes the metadata contained therein,'
    print 'printing summarized and sorted information.  This is useful for'
    print 'generating data about dumpfiles in a diffable fashion.'
    sys.exit(0)
    
def main():
    if len(sys.argv) > 1:
        if sys.argv[1] == '--help':
            usage()
        fp = open(sys.argv[1], 'rb')
    else:
        fp = sys.stdin
    parse_file(fp)

    
if __name__ == '__main__':
    main()