File: extract_scripts.py

package info (click to toggle)
pytorch-cuda 2.6.0%2Bdfsg-7
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid, trixie
  • size: 161,620 kB
  • sloc: python: 1,278,832; cpp: 900,322; ansic: 82,710; asm: 7,754; java: 3,363; sh: 2,811; javascript: 2,443; makefile: 597; ruby: 195; xml: 84; objc: 68
file content (105 lines) | stat: -rwxr-xr-x 3,128 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python3

from __future__ import annotations

import argparse
import re
import sys
from pathlib import Path
from typing import Any, Dict
from typing_extensions import TypedDict  # Python 3.11+

import yaml


Step = Dict[str, Any]


class Script(TypedDict):
    extension: str
    script: str


def extract(step: Step) -> Script | None:
    run = step.get("run")

    # https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#using-a-specific-shell
    shell = step.get("shell", "bash")
    extension = {
        "bash": ".sh",
        "pwsh": ".ps1",
        "python": ".py",
        "sh": ".sh",
        "cmd": ".cmd",
        "powershell": ".ps1",
    }.get(shell)

    is_gh_script = step.get("uses", "").startswith("actions/github-script@")
    gh_script = step.get("with", {}).get("script")

    if run is not None and extension is not None:
        script = {
            "bash": f"#!/usr/bin/env bash\nset -eo pipefail\n{run}",
            "sh": f"#!/usr/bin/env sh\nset -e\n{run}",
        }.get(shell, run)
        return {"extension": extension, "script": script}
    elif is_gh_script and gh_script is not None:
        return {"extension": ".js", "script": gh_script}
    else:
        return None


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--out", required=True)
    args = parser.parse_args()

    out = Path(args.out)
    if out.exists():
        sys.exit(f"{out} already exists; aborting to avoid overwriting")

    gha_expressions_found = False

    for p in Path(".github/workflows").iterdir():
        with open(p, "rb") as f:
            workflow = yaml.safe_load(f)

        for job_name, job in workflow["jobs"].items():
            job_dir = out / p / job_name
            if "steps" not in job:
                continue
            steps = job["steps"]
            index_chars = len(str(len(steps) - 1))
            for i, step in enumerate(steps, start=1):
                extracted = extract(step)
                if extracted:
                    script = extracted["script"]
                    step_name = step.get("name", "")
                    if "${{" in script:
                        gha_expressions_found = True
                        print(
                            f"{p} job `{job_name}` step {i}: {step_name}",
                            file=sys.stderr,
                        )

                    job_dir.mkdir(parents=True, exist_ok=True)

                    sanitized = re.sub(
                        "[^a-zA-Z_]+",
                        "_",
                        f"_{step_name}",
                    ).rstrip("_")
                    extension = extracted["extension"]
                    filename = f"{i:0{index_chars}}{sanitized}{extension}"
                    (job_dir / filename).write_text(script)

    if gha_expressions_found:
        sys.exit(
            "Each of the above scripts contains a GitHub Actions "
            "${{ <expression> }} which must be replaced with an `env` variable"
            " for security reasons."
        )


if __name__ == "__main__":
    main()