File: run_benchmark.py

package info (click to toggle)
duckdb 1.5.1-3
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 299,196 kB
  • sloc: cpp: 865,414; ansic: 57,292; python: 18,871; sql: 12,663; lisp: 11,751; yacc: 7,412; lex: 1,682; sh: 747; makefile: 564
file content (52 lines) | stat: -rw-r--r-- 1,865 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import argparse
import os
import subprocess
import re

parser = argparse.ArgumentParser(description='Run a full benchmark using the CLI and report the results.')
parser.add_argument('--shell', action='store', help='Path to the CLI', default='build/reldebug/duckdb')
parser.add_argument('--database', action='store', help='Path to the database file to load data from')
parser.add_argument(
    '--queries', action='store', help='Path to the list of queries to run (e.g. benchmark/clickbench/queries)'
)
parser.add_argument('--nrun', action='store', help='The number of runs', default=3)

args = parser.parse_args()

queries = os.listdir(args.queries)
queries.sort()
ran_queries = []
timings = []
for q in queries:
    if 'load.sql' in q:
        continue
    command = [args.shell, args.database]
    command += ['-c', '.timer on']
    for i in range(args.nrun):
        command += ['-c', '.read ' + os.path.join(args.queries, q)]
    res = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

    stdout = res.stdout.decode('utf8').strip()
    stderr = res.stderr.decode('utf8').strip()
    results = re.findall(r'Run Time \(s\): real (\d+.\d+)', stdout)
    if res.returncode != 0 or 'Error:\n' in stderr or len(results) != args.nrun:
        print("------- Failed to run query -------")
        print(q)
        print("------- stdout -------")
        print(stdout)
        print("------- stderr -------")
        print(stderr)
        exit(1)
    results = [float(x) for x in results]
    print(f"Timings for {q}: " + str(results))
    ran_queries.append(q)
    timings.append(results[1])

print('')
sql_query = 'SELECT UNNEST(['
sql_query += ','.join(["'" + x + "'" for x in ran_queries]) + ']) as query'
sql_query += ","
sql_query += "UNNEST(["
sql_query += ','.join([str(x) for x in timings])
sql_query += "]) as timing;"
print(sql_query)