1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
|
import json
import os
import subprocess
import time
from pathlib import Path
from typing import Any, cast, Dict, List, Optional, Tuple
import requests
from clickhouse import query_clickhouse # type: ignore[import]
REPO_ROOT = Path(__file__).resolve().parent.parent.parent
QUERY = """
WITH most_recent_strict_commits AS (
SELECT
distinct push.head_commit.'id' as sha
FROM
-- not bothering with final
default.push
WHERE
push.ref = 'refs/heads/viable/strict'
AND push.repository.'full_name' = 'pytorch/pytorch'
ORDER BY
push.head_commit.'timestamp' desc
LIMIT
3
), workflows AS (
SELECT
id
FROM
default.workflow_run w final
WHERE
w.id in (select id from materialized_views.workflow_run_by_head_sha
where head_sha in (select sha from most_recent_strict_commits)
)
and w.name != 'periodic'
),
job AS (
SELECT
j.id as id
FROM
default.workflow_job j final
WHERE
j.run_id in (select id from workflows)
and j.name NOT LIKE '%asan%'
),
duration_per_job AS (
SELECT
test_run.classname as classname,
test_run.name as name,
job.id as id,
SUM(test_run.time) as time
FROM
default.test_run_s3 test_run
INNER JOIN job ON test_run.job_id = job.id
WHERE
/* cpp tests do not populate `file` for some reason. */
/* Exclude them as we don't include them in our slow test infra */
test_run.file != ''
/* do some more filtering to cut down on the test_run size */
AND empty(test_run.skipped)
AND empty(test_run.failure)
AND empty(test_run.error)
and test_run.job_id in (select id from job)
GROUP BY
test_run.classname,
test_run.name,
job.id
)
SELECT
CONCAT(
name,
' (__main__.',
classname,
')'
) as test_name,
AVG(time) as avg_duration_sec
FROM
duration_per_job
GROUP BY
CONCAT(
name,
' (__main__.',
classname,
')'
)
HAVING
AVG(time) > 60.0
ORDER BY
test_name
"""
UPDATEBOT_TOKEN = os.environ["UPDATEBOT_TOKEN"]
PYTORCHBOT_TOKEN = os.environ["PYTORCHBOT_TOKEN"]
def git_api(
url: str, params: Dict[str, Any], type: str = "get", token: str = UPDATEBOT_TOKEN
) -> Any:
headers = {
"Accept": "application/vnd.github.v3+json",
"Authorization": f"token {token}",
}
if type == "post":
return requests.post(
f"https://api.github.com{url}",
data=json.dumps(params),
headers=headers,
).json()
elif type == "patch":
return requests.patch(
f"https://api.github.com{url}",
data=json.dumps(params),
headers=headers,
).json()
else:
return requests.get(
f"https://api.github.com{url}",
params=params,
headers=headers,
).json()
def make_pr(source_repo: str, params: Dict[str, Any]) -> int:
response = git_api(f"/repos/{source_repo}/pulls", params, type="post")
print(f"made pr {response['html_url']}")
return cast(int, response["number"])
def approve_pr(source_repo: str, pr_number: int) -> None:
params = {"event": "APPROVE"}
# use pytorchbot to approve the pr
git_api(
f"/repos/{source_repo}/pulls/{pr_number}/reviews",
params,
type="post",
token=PYTORCHBOT_TOKEN,
)
def make_comment(source_repo: str, pr_number: int, msg: str) -> None:
params = {"body": msg}
# comment with pytorchbot because pytorchmergebot gets ignored
git_api(
f"/repos/{source_repo}/issues/{pr_number}/comments",
params,
type="post",
token=PYTORCHBOT_TOKEN,
)
def add_labels(source_repo: str, pr_number: int, labels: List[str]) -> None:
params = {"labels": labels}
git_api(
f"/repos/{source_repo}/issues/{pr_number}/labels",
params,
type="post",
)
def search_for_open_pr(
source_repo: str, search_string: str
) -> Optional[Tuple[int, str]]:
params = {
"q": f"is:pr is:open in:title author:pytorchupdatebot repo:{source_repo} {search_string}",
"sort": "created",
}
response = git_api("/search/issues", params)
if response["total_count"] != 0:
# pr does exist
pr_num = response["items"][0]["number"]
link = response["items"][0]["html_url"]
response = git_api(f"/repos/{source_repo}/pulls/{pr_num}", {})
branch_name = response["head"]["ref"]
print(
f"pr does exist, number is {pr_num}, branch name is {branch_name}, link is {link}"
)
return pr_num, branch_name
return None
if __name__ == "__main__":
results = query_clickhouse(QUERY, {})
slow_tests = {row["test_name"]: row["avg_duration_sec"] for row in results}
with open(REPO_ROOT / "test" / "slow_tests.json", "w") as f:
json.dump(slow_tests, f, indent=2)
branch_name = f"update_slow_tests_{int(time.time())}"
pr_num = None
open_pr = search_for_open_pr("pytorch/pytorch", "Update slow tests")
if open_pr is not None:
pr_num, branch_name = open_pr
subprocess.run(["git", "checkout", "-b", branch_name], cwd=REPO_ROOT)
subprocess.run(["git", "add", "test/slow_tests.json"], cwd=REPO_ROOT)
subprocess.run(["git", "commit", "-m", "Update slow tests"], cwd=REPO_ROOT)
subprocess.run(
f"git push --set-upstream origin {branch_name} -f".split(), cwd=REPO_ROOT
)
params = {
"title": "Update slow tests",
"head": branch_name,
"base": "main",
"body": "This PR is auto-generated weekly by [this action](https://github.com/pytorch/pytorch/blob/main/"
+ ".github/workflows/weekly.yml).\nUpdate the list of slow tests.",
}
if pr_num is None:
# no existing pr, so make a new one and approve it
pr_num = make_pr("pytorch/pytorch", params)
time.sleep(5)
add_labels("pytorch/pytorch", pr_num, ["ciflow/slow", "ci-no-td"])
approve_pr("pytorch/pytorch", pr_num)
make_comment("pytorch/pytorch", pr_num, "@pytorchbot merge")
|