File: explicit_ci_jobs.py

package info (click to toggle)
pytorch-cuda 2.6.0%2Bdfsg-7
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid, trixie
  • size: 161,620 kB
  • sloc: python: 1,278,832; cpp: 900,322; ansic: 82,710; asm: 7,754; java: 3,363; sh: 2,811; javascript: 2,443; makefile: 597; ruby: 195; xml: 84; objc: 68
file content (160 lines) | stat: -rwxr-xr-x 5,005 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/env python3

from __future__ import annotations

import argparse
import fnmatch
import subprocess
import textwrap
from pathlib import Path
from typing import Any

import yaml


REPO_ROOT = Path(__file__).parent.parent.parent
CONFIG_YML = REPO_ROOT / ".circleci" / "config.yml"
WORKFLOWS_DIR = REPO_ROOT / ".github" / "workflows"


WORKFLOWS_TO_CHECK = [
    "binary_builds",
    "build",
    "master_build",
    # These are formatted slightly differently, skip them
    # "scheduled-ci",
    # "debuggable-scheduled-ci",
    # "slow-gradcheck-scheduled-ci",
    # "promote",
]


def add_job(
    workflows: dict[str, Any],
    workflow_name: str,
    type: str,
    job: dict[str, Any],
    past_jobs: dict[str, Any],
) -> None:
    """
    Add job 'job' under 'type' and 'workflow_name' to 'workflow' in place. Also
    add any dependencies (they must already be in 'past_jobs')
    """
    if workflow_name not in workflows:
        workflows[workflow_name] = {"when": "always", "jobs": []}

    requires = job.get("requires", None)
    if requires is not None:
        for requirement in requires:
            dependency = past_jobs[requirement]
            add_job(
                workflows,
                dependency["workflow_name"],
                dependency["type"],
                dependency["job"],
                past_jobs,
            )

    workflows[workflow_name]["jobs"].append({type: job})


def get_filtered_circleci_config(
    workflows: dict[str, Any], relevant_jobs: list[str]
) -> dict[str, Any]:
    """
    Given an existing CircleCI config, remove every job that's not listed in
    'relevant_jobs'
    """
    new_workflows: dict[str, Any] = {}
    past_jobs: dict[str, Any] = {}
    for workflow_name, workflow in workflows.items():
        if workflow_name not in WORKFLOWS_TO_CHECK:
            # Don't care about this workflow, skip it entirely
            continue

        for job_dict in workflow["jobs"]:
            for type, job in job_dict.items():
                if "name" not in job:
                    # Job doesn't have a name so it can't be handled
                    print("Skipping", type)
                else:
                    if job["name"] in relevant_jobs:
                        # Found a job that was specified at the CLI, add it to
                        # the new result
                        add_job(new_workflows, workflow_name, type, job, past_jobs)

                    # Record the job in case it's needed as a dependency later
                    past_jobs[job["name"]] = {
                        "workflow_name": workflow_name,
                        "type": type,
                        "job": job,
                    }

    return new_workflows


def commit_ci(files: list[str], message: str) -> None:
    # Check that there are no other modified files than the ones edited by this
    # tool
    stdout = subprocess.run(
        ["git", "status", "--porcelain"], stdout=subprocess.PIPE
    ).stdout.decode()
    for line in stdout.split("\n"):
        if line == "":
            continue
        if line[0] != " ":
            raise RuntimeError(
                f"Refusing to commit while other changes are already staged: {line}"
            )

    # Make the commit
    subprocess.run(["git", "add"] + files)
    subprocess.run(["git", "commit", "-m", message])


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="make .circleci/config.yml only have a specific set of jobs and delete GitHub actions"
    )
    parser.add_argument("--job", action="append", help="job name", default=[])
    parser.add_argument(
        "--filter-gha", help="keep only these github actions (glob match)", default=""
    )
    parser.add_argument(
        "--make-commit",
        action="store_true",
        help="add change to git with to a do-not-merge commit",
    )
    args = parser.parse_args()

    touched_files = [CONFIG_YML]
    with open(CONFIG_YML) as f:
        config_yml = yaml.safe_load(f.read())

    config_yml["workflows"] = get_filtered_circleci_config(
        config_yml["workflows"], args.job
    )

    with open(CONFIG_YML, "w") as f:
        yaml.dump(config_yml, f)

    if args.filter_gha:
        for relative_file in WORKFLOWS_DIR.iterdir():
            path = REPO_ROOT.joinpath(relative_file)
            if not fnmatch.fnmatch(path.name, args.filter_gha):
                touched_files.append(path)
                path.resolve().unlink()

    if args.make_commit:
        jobs_str = "\n".join([f" * {job}" for job in args.job])
        message = textwrap.dedent(
            f"""
        [skip ci][do not merge] Edit config.yml to filter specific jobs

        Filter CircleCI to only run:
        {jobs_str}

        See [Run Specific CI Jobs](https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md#run-specific-ci-jobs) for details.
        """
        ).strip()
        commit_ci([str(f.relative_to(REPO_ROOT)) for f in touched_files], message)