File: normalize-dump.py

package info (click to toggle)
subversion 1.14.5-5
links: PTS, VCS
area: main
in suites: forky, sid
size: 80,512 kB
sloc: ansic: 1,039,361; python: 140,270; cpp: 24,862; java: 24,547; ruby: 12,312; lisp: 7,619; sh: 7,414; perl: 7,010; sql: 1,686; makefile: 1,191; xml: 577
file content (137 lines) | stat: -rwxr-xr-x 3,986 bytes
parent folder | download | duplicates (10)
#!/usr/bin/env python
#
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
#

import sys
import re

header_re = re.compile(r'^([^:]*): ?(.*)$')

class NodePath:
    def __init__(self, path, headers):
        self.path = path
        self.headers = headers

    def dump(self):
        print((' ' * 3) + self.path)
        headers = sorted(self.headers.keys())
        for header in headers:
            print((' ' * 6) + header + ': ' + self.headers[header])


def dump_revision(rev, nodepaths):
    sys.stderr.write('* Normalizing revision ' + rev + '...')
    print('Revision ' + rev)
    paths = sorted(nodepaths.keys())
    for path in paths:
        nodepath = nodepaths[path]
        nodepath.dump()
    sys.stderr.write('done\n')



def parse_header_block(fp):
    headers = {}
    while True:
        line = fp.readline()
        if line == '':
            return headers, 1
        line = line.strip()
        if line == '':
            return headers, 0
        matches = header_re.match(line)
        if not matches:
            raise Exception('Malformed header block')
        headers[matches.group(1)] = matches.group(2)


def parse_file(fp):
    nodepaths = {}
    current_rev = None

    while True:
        # Parse a block of headers
        headers, eof = parse_header_block(fp)

        # This is a revision header block
        if 'Revision-number' in headers:

            # If there was a previous revision, dump it
            if current_rev:
                dump_revision(current_rev, nodepaths)

            # Reset the data for this revision
            current_rev = headers['Revision-number']
            nodepaths = {}

            # Skip the contents
            prop_len = headers.get('Prop-content-length', 0)
            fp.read(int(prop_len))

        # This is a node header block
        elif 'Node-path' in headers:

            # Make a new NodePath object, and add it to the
            # dictionary thereof
            path = headers['Node-path']
            node = NodePath(path, headers)
            nodepaths[path] = node

            # Skip the content
            text_len = headers.get('Text-content-length', 0)
            prop_len = headers.get('Prop-content-length', 0)
            fp.read(int(text_len) + int(prop_len))

        # Not a revision, not a node -- if we've already seen at least
        # one revision block, we are in an errorful state.
        elif current_rev and len(headers.keys()):
            raise Exception('Header block from outta nowhere')

        if eof:
            if current_rev:
                dump_revision(current_rev, nodepaths)
            break

def usage():
    print('Usage: ' + sys.argv[0] + ' [DUMPFILE]')
    print('')
    print('Reads a Subversion dumpfile from DUMPFILE (or, if not provided,')
    print('from stdin) and normalizes the metadata contained therein,')
    print('printing summarized and sorted information.  This is useful for')
    print('generating data about dumpfiles in a diffable fashion.')
    sys.exit(0)

def main():
    if len(sys.argv) > 1:
        if sys.argv[1] == '--help':
            usage()
        fp = open(sys.argv[1], 'rb')
    else:
        fp = sys.stdin
    parse_file(fp)


if __name__ == '__main__':
    main()