File: regression_test_storage_size.py

package info (click to toggle)
duckdb 1.5.1-2
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 299,196 kB
  • sloc: cpp: 865,414; ansic: 57,292; python: 18,871; sql: 12,663; lisp: 11,751; yacc: 7,412; lex: 1,682; sh: 747; makefile: 558
file content (87 lines) | stat: -rw-r--r-- 2,574 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import os
import argparse
import subprocess
import tempfile

# the threshold at which we consider something a regression (percentage)
regression_threshold_percentage = 0.05

parser = argparse.ArgumentParser(description='Generate TPC-DS reference results from Postgres.')
parser.add_argument('--old', dest='old_runner', action='store', help='Path to the old shell executable')
parser.add_argument('--new', dest='new_runner', action='store', help='Path to the new shell executable')

args = parser.parse_args()

old_runner = args.old_runner
new_runner = args.new_runner
exit_code = 0

if not os.path.isfile(old_runner):
    print(f"Failed to find old runner {old_runner}")
    exit(1)

if not os.path.isfile(new_runner):
    print(f"Failed to find new runner {new_runner}")
    exit(1)


def load_data(shell_path, load_script):
    with tempfile.NamedTemporaryFile() as f:
        filename = f.name
    proc = subprocess.Popen(
        [
            shell_path,
            '-storage_version',
            'latest',
            '-c',
            "set storage_compatibility_version='latest'",
            '-c',
            load_script,
            filename,
        ]
    )
    proc.wait()
    if proc.returncode != 0:
        print('----------------------------')
        print('FAILED TO RUN')
        print('----------------------------')
        return None
    return os.path.getsize(filename)


def run_benchmark(load_script, benchmark_name):
    print('----------------------------')
    print(f'Running benchmark {benchmark_name}')
    print('----------------------------')
    old_size = load_data(old_runner, load_script)
    if old_size is None:
        return False
    new_size = load_data(new_runner, load_script)
    if new_size is None:
        return False
    print(f'Database size with old runner: {old_size}')
    print(f'Database size with new runner: {new_size}')
    if new_size - new_size * regression_threshold_percentage > old_size:
        print('----------------------------')
        print('FAILURE: SIZE INCREASE')
        print('----------------------------')
        return False
    else:
        print('----------------------------')
        print('SUCCESS!')
        print('----------------------------')
    return True


tpch_load = 'CALL dbgen(sf=1);'
tpcds_load = 'CALL dsdgen(sf=1);'


benchmarks = [[tpch_load, 'TPC-H SF1'], [tpcds_load, 'TPC-DS SF1']]

for benchmark in benchmarks:
    if not run_benchmark(benchmark[0], benchmark[1]):
        print(f'Database size increased in {benchmark[1]}')
        exit_code = 1

exit(exit_code)