File: generate-churning-bundle.py

package info (click to toggle)
mercurial 6.3.2-1%2Bdeb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 42,052 kB
  • sloc: python: 199,820; ansic: 46,300; tcl: 3,715; sh: 1,676; lisp: 1,483; cpp: 864; javascript: 649; makefile: 626; xml: 36; sql: 30
file content (142 lines) | stat: -rwxr-xr-x 4,407 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python3
#
# generate-branchy-bundle - generate a branch for a "large" branchy repository
#
# Copyright 2018 Octobus, contact@octobus.net
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
#
# This script generates a repository suitable for testing delta computation
# strategies.
#
# The repository update a single "large" file with many updates. One fixed part
# of the files always get updated while the rest of the lines get updated over
# time. This update happens over many topological branches, some getting merged
# back.
#
# Running with `chg` in your path and `CHGHG` set is recommended for speed.


import hashlib
import os
import shutil
import subprocess
import sys
import tempfile

BUNDLE_NAME = 'big-file-churn.hg'

# constants for generating the repository
NB_CHANGESET = 5000
PERIOD_MERGING = 8
PERIOD_BRANCHING = 7
MOVE_BACK_MIN = 3
MOVE_BACK_RANGE = 5

# constants for generating the large file we keep updating
#
# At each revision, the beginning on the file change,
# and set of other lines changes too.
FILENAME = 'SPARSE-REVLOG-TEST-FILE'
NB_LINES = 10500
ALWAYS_CHANGE_LINES = 500
OTHER_CHANGES = 300


def nextcontent(previous_content):
    """utility to produce a new file content from the previous one"""
    return hashlib.md5(previous_content).hexdigest()


def filecontent(iteridx, oldcontent):
    """generate a new file content

    The content is generated according the iteration index and previous
    content"""

    # initial call
    if iteridx is None:
        current = ''
    else:
        current = str(iteridx)

    for idx in range(NB_LINES):
        do_change_line = True
        if oldcontent is not None and ALWAYS_CHANGE_LINES < idx:
            do_change_line = not ((idx - iteridx) % OTHER_CHANGES)

        if do_change_line:
            to_write = current + '\n'
            current = nextcontent(current)
        else:
            to_write = oldcontent[idx]
        yield to_write


def updatefile(filename, idx):
    """update <filename> to be at appropriate content for iteration <idx>"""
    existing = None
    if idx is not None:
        with open(filename, 'rb') as old:
            existing = old.readlines()
    with open(filename, 'wb') as target:
        for line in filecontent(idx, existing):
            target.write(line)


def hg(command, *args):
    """call a mercurial command with appropriate config and argument"""
    env = os.environ.copy()
    if 'CHGHG' in env:
        full_cmd = ['chg']
    else:
        full_cmd = ['hg']
    full_cmd.append('--quiet')
    full_cmd.append(command)
    if command == 'commit':
        # reproducible commit metadata
        full_cmd.extend(['--date', '0 0', '--user', 'test'])
    elif command == 'merge':
        # avoid conflicts by picking the local variant
        full_cmd.extend(['--tool', ':merge-local'])
    full_cmd.extend(args)
    env['HGRCPATH'] = ''
    return subprocess.check_call(full_cmd, env=env)


def run(target):
    tmpdir = tempfile.mkdtemp(prefix='tmp-hg-test-big-file-bundle-')
    try:
        os.chdir(tmpdir)
        hg('init')
        updatefile(FILENAME, None)
        hg('commit', '--addremove', '--message', 'initial commit')
        for idx in range(1, NB_CHANGESET + 1):
            if sys.stdout.isatty():
                print("generating commit #%d/%d" % (idx, NB_CHANGESET))
            if (idx % PERIOD_BRANCHING) == 0:
                move_back = MOVE_BACK_MIN + (idx % MOVE_BACK_RANGE)
                hg('update', ".~%d" % move_back)
            if (idx % PERIOD_MERGING) == 0:
                hg('merge', 'min(head())')
            updatefile(FILENAME, idx)
            hg('commit', '--message', 'commit #%d' % idx)
        hg('bundle', '--all', target, '--config', 'devel.bundle.delta=p1')
        with open(target, 'rb') as bundle:
            data = bundle.read()
            digest = hashlib.md5(data).hexdigest()
        with open(target + '.md5', 'wb') as md5file:
            md5file.write(digest + '\n')
        if sys.stdout.isatty():
            print('bundle generated at "%s" md5: %s' % (target, digest))

    finally:
        shutil.rmtree(tmpdir)
    return 0


if __name__ == '__main__':
    orig = os.path.realpath(os.path.dirname(sys.argv[0]))
    target = os.path.join(orig, os.pardir, 'cache', BUNDLE_NAME)
    sys.exit(run(target))