File: regression_check.py

package info (click to toggle)
duckdb 1.5.1-2
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 299,196 kB
  • sloc: cpp: 865,414; ansic: 57,292; python: 18,871; sql: 12,663; lisp: 11,751; yacc: 7,412; lex: 1,682; sh: 747; makefile: 558
file content (115 lines) | stat: -rw-r--r-- 3,186 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import os
import sys
import duckdb
import numpy
import subprocess
from io import StringIO
import csv
import statistics

old_file = None
new_file = None
# the threshold at which we consider something a regression (percentage)
regression_threshold_percentage = 0.1
# minimal seconds diff for something to be a regression (for very fast benchmarks)
regression_threshold_seconds = 0.01

for arg in sys.argv:
    if arg.startswith("--old="):
        old_file = arg.replace("--old=", "")
    elif arg.startswith("--new="):
        new_file = arg.replace("--new=", "")

if old_file is None or new_file is None:
    print("Usage: python scripts/regression_check.py --old=<old_file> --new-<new_file>")
    exit(1)

con = duckdb.connect()
old_timings_l = con.execute(
    f"SELECT name, median(time) FROM read_csv_auto('{old_file}') t(name, nrun, time) GROUP BY ALL ORDER BY ALL"
).fetchall()
new_timings_l = con.execute(
    f"SELECT name, median(time) FROM read_csv_auto('{new_file}') t(name, nrun, time) GROUP BY ALL ORDER BY ALL"
).fetchall()

old_timings = {}
new_timings = {}

for entry in old_timings_l:
    name = entry[0]
    timing = entry[1]
    old_timings[name] = timing

for entry in new_timings_l:
    name = entry[0]
    timing = entry[1]
    new_timings[name] = timing

slow_keys = []
multiply_percentage = 1.0 + regression_threshold_percentage

test_keys = list(new_timings.keys())
test_keys.sort()

for key in test_keys:
    new_timing = new_timings[key]
    old_timing = old_timings[key]
    if (old_timing + regression_threshold_seconds) * multiply_percentage < new_timing:
        slow_keys.append(key)

return_code = 0
if len(slow_keys) > 0:
    print(
        '''====================================================
==============  REGRESSIONS DETECTED   =============
====================================================
'''
    )
    return_code = 1
    for key in slow_keys:
        new_timing = new_timings[key]
        old_timing = old_timings[key]
        print(key)
        print(f"Old timing: {old_timing}")
        print(f"New timing: {new_timing}")
        print("")

    print(
        '''====================================================
==================  New Timings   ==================
====================================================
'''
    )
    with open(new_file, 'r') as f:
        print(f.read())
    print(
        '''====================================================
==================  Old Timings   ==================
====================================================
'''
    )
    with open(old_file, 'r') as f:
        print(f.read())
else:
    print(
        '''====================================================
============== NO REGRESSIONS DETECTED  =============
====================================================
'''
    )

print(
    '''====================================================
=================== ALL TIMINGS  ===================
====================================================
'''
)
for key in test_keys:
    new_timing = new_timings[key]
    old_timing = old_timings[key]
    print(key)
    print(f"Old timing: {old_timing}")
    print(f"New timing: {new_timing}")
    print("")

exit(return_code)