File: optimize.py

package info (click to toggle)
sqlglot 27.6.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 96,288 kB
  • sloc: python: 73,347; sql: 20,230; javascript: 40; makefile: 39
file content (72 lines) | stat: -rw-r--r-- 1,910 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import sys
import os
import pyperf

# Add the project root to the path so we can import from tests
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from sqlglot.optimizer import optimize
from sqlglot import parse_one
from tests.helpers import load_sql_fixture_pairs, TPCH_SCHEMA, TPCDS_SCHEMA

# Deeply nested conditions currently require a lot of recursion
sys.setrecursionlimit(10000)


def gen_condition(n):
    return parse_one(" OR ".join(f"a = {i} AND b = {i}" for i in range(n)))


# Create benchmark functions that return the setup data
def get_tpch_setup():
    return (
        [parse_one(sql) for _, sql, _ in load_sql_fixture_pairs("optimizer/tpc-h/tpc-h.sql")],
        TPCH_SCHEMA,
    )


def get_tpcds_setup():
    return (
        [parse_one(sql) for _, sql, _ in load_sql_fixture_pairs("optimizer/tpc-ds/tpc-ds.sql")],
        TPCDS_SCHEMA,
    )


def get_condition_10_setup():
    return ([gen_condition(10)], {})


def get_condition_100_setup():
    return ([gen_condition(100)], {})


def get_condition_1000_setup():
    return ([gen_condition(1000)], {})


# Optimizer functions that will be benchmarked
def optimize_queries(expressions, schema):
    for e in expressions:
        optimize(e, schema)


def run_benchmarks():
    runner = pyperf.Runner()

    # Define benchmarks with their setup functions
    benchmarks = {
        "tpch": get_tpch_setup,
        # "tpcds": get_tpcds_setup,  # This is left out because it's too slow in CI
        "condition_10": get_condition_10_setup,
        "condition_100": get_condition_100_setup,
        "condition_1000": get_condition_1000_setup,
    }

    for benchmark_name, benchmark_setup in benchmarks.items():
        expressions, schema = benchmark_setup()

        runner.bench_func(f"optimize_{benchmark_name}", optimize_queries, expressions, schema)


if __name__ == "__main__":
    run_benchmarks()