File: get_integration_test_params.py

package info (click to toggle)
sqlglot 28.5.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 14,672 kB
  • sloc: python: 84,517; sql: 22,534; makefile: 48
file content (166 lines) | stat: -rwxr-xr-x 5,990 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#!/usr/bin/env python3
"""
This script is intended to be used as part of a GitHub Actions workflow in order to decide if the integration tests should:

a) be triggered at all
b) if they should be triggered, should they be triggered for a subset of dialects or all dialects?

The tests can be triggered manually by using the following directive in the PR description:

 /integration-tests

To limit them to a certain dialect or dialects, you can specify:

 /integration-tests dialects=bigquery,duckdb

If you specify nothing, a `git diff` will be performed between your PR branch and the base branch.
If any files modified contain one of the SUPPORTED_DIALECTS in the filename, that dialect will be added to the
list of dialects to test. If no files match, the integration tests will be skipped.

Note that integration tests in the remote workflow are only implemented for a subset of dialects.
If new ones are added, update the SUPPORTED_DIALECTS constant below.

Each dialect is tested against itself (roundtrip) and duckdb (transpilation).
Supplying a dialect not in this list will cause the tests to get skipped.
"""

import typing as t
import os
import sys
import json
import subprocess
from pathlib import Path

TRIGGER = "/integration-test"
SUPPORTED_DIALECTS = ["duckdb", "bigquery", "snowflake"]


def get_dialects_from_manual_trigger(trigger: str) -> t.Set[str]:
    """
    Takes a trigger string and parses out the supported dialects

    /integration_test -> []
    /integration_test dialects=bigquery -> ["bigquery"]
    /integration_test dialects=bigquery,duckdb -> ["bigquery","duckdb"]
    /integration_test dialects=exasol,duckdb -> ["duckdb"]
    """

    if not trigger.startswith(TRIGGER):
        raise ValueError(f"Invalid trigger: {trigger}")

    # trim off start at first space (to cover both /integration-test and /integration-tests)
    trigger_parts = trigger.split(" ")[1:]

    print(f"Parsing trigger args: {trigger_parts}")

    dialects: t.List[str] = []
    for part in trigger_parts:
        # try to parse key=value pairs
        maybe_kv = part.split("=", maxsplit=1)
        if len(maybe_kv) >= 2:
            k, v = maybe_kv[0], maybe_kv[1]
            if k.lower().startswith("dialect"):
                dialects.extend([d.lower().strip() for d in v.split(",")])

    return {d for d in dialects if d in SUPPORTED_DIALECTS}


def get_dialects_from_git(base_ref: str, current_ref: str) -> t.Set[str]:
    """
    Takes two git refs and runs `git diff --name-only <base_ref> <current_ref>`

    If any of the returned file names contain a dialect from SUPPORTED_DIALECTS as
    a substring, that dialect is included in the returned set
    """
    print(f"Checking for files changed between '{base_ref}' and '{current_ref}'")

    result = subprocess.run(
        ["git", "diff", "--name-only", base_ref, current_ref],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
    )
    output = result.stdout.decode("utf8")

    if result.returncode != 0:
        raise ValueError(f"Git process failed with exit code {result.returncode}:\n{output}")

    print(f"Git output:\n{output}")

    matching_dialects = []

    for l in output.splitlines():
        l = l.strip().lower()

        matching_dialects.extend([d for d in SUPPORTED_DIALECTS if d in l])

    return set(matching_dialects)


if __name__ == "__main__":
    github_event_path = os.environ.get("GITHUB_EVENT_PATH")
    github_output = os.environ.get("GITHUB_OUTPUT")

    if not os.environ.get("GITHUB_ACTIONS") or not github_event_path or not github_output:
        print(f"This script needs to run within GitHub Actions")
        sys.exit(1)

    github_event_path = Path(github_event_path)
    github_output = Path(github_output)

    with github_event_path.open("r") as f:
        event: t.Dict[str, t.Any] = json.load(f)

    print(f"Handling event: \n" + json.dumps(event, indent=2))

    # for pull_request events, the body is located at github.event.pull_request.body
    pr_description: str = event.get("pull_request", {}).get("body", "")

    dialects = []
    should_run = False

    pr_description_lines = [l.strip().lower() for l in pr_description.splitlines()]
    if trigger_line := [l for l in pr_description_lines if l.startswith(TRIGGER)]:
        # if the user has explicitly requested /integration-tests then use that
        print(f"Handling trigger line: {trigger_line[0]}")
        dialects = get_dialects_from_manual_trigger(trigger_line[0])
        should_run = True
    else:
        # otherwise, do a git diff and inspect the changed files
        print(f"Explicit trigger line not detected; performing git diff")
        pull_request_base_ref = event.get("pull_request", {}).get("base", {}).get("sha")
        if not pull_request_base_ref:
            raise ValueError("Unable to determine base ref")

        current_ref = event.get("pull_request", {}).get("head", {}).get("sha")

        if not current_ref:
            raise ValueError("Unable to determine current/head ref")

        print(f"Comparing '{current_ref}' against '{pull_request_base_ref}'")
        # otherwise, look at git files changed and only trigger if a file relating
        # to a supported dialect has changed
        dialects = get_dialects_from_git(base_ref=pull_request_base_ref, current_ref=current_ref)
        if dialects:
            should_run = True

    if should_run:
        dialects_str = (
            f"the following dialects: {', '.join(dialects)}"
            if dialects
            else f"all supported dialects"
        )
        print(f"Conclusion: should run tests for {dialects_str}")
    else:
        print(f"Conclusion: No tests to run")

    # write output variables
    lines = []
    if should_run:
        lines.append("skip=false")
        if dialects:
            lines.append(f"dialects={','.join(dialects)}")
    else:
        lines.append("skip=true")

    with github_output.open("a") as f:
        f.writelines(f"{l}\n" for l in lines)