1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
|
#!/usr/bin/env python3
# ===----------------------------------------------------------------------===##
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# ===----------------------------------------------------------------------===##
from typing import List, Dict, Tuple, Optional
import copy
import csv
import itertools
import json
import os
import pathlib
import re
import subprocess
# Number of the 'Libc++ Standards Conformance' project on Github
LIBCXX_CONFORMANCE_PROJECT = '31'
def extract_between_markers(text: str, begin_marker: str, end_marker: str) -> Optional[str]:
"""
Given a string containing special markers, extract everything located beetwen these markers.
If the beginning marker is not found, None is returned. If the beginning marker is found but
there is no end marker, it is an error (this is done to avoid silently accepting inputs that
are erroneous by mistake).
"""
start = text.find(begin_marker)
if start == -1:
return None
start += len(begin_marker) # skip the marker itself
end = text.find(end_marker, start)
if end == -1:
raise ArgumentError(f"Could not find end marker {end_marker} in: {text[start:]}")
return text[start:end]
class PaperStatus:
TODO = 1
IN_PROGRESS = 2
PARTIAL = 3
DONE = 4
NOTHING_TO_DO = 5
_status: int
_original: Optional[str]
"""
Optional string from which the paper status was created. This is used to carry additional
information from CSV rows, like any notes associated to the status.
"""
def __init__(self, status: int, original: Optional[str] = None):
self._status = status
self._original = original
def __eq__(self, other) -> bool:
return self._status == other._status
def __lt__(self, other) -> bool:
relative_order = {
PaperStatus.TODO: 0,
PaperStatus.IN_PROGRESS: 1,
PaperStatus.PARTIAL: 2,
PaperStatus.DONE: 3,
PaperStatus.NOTHING_TO_DO: 3,
}
return relative_order[self._status] < relative_order[other._status]
@staticmethod
def from_csv_entry(entry: str):
"""
Parse a paper status out of a CSV row entry. Entries can look like:
- '' (an empty string, which means the paper is not done yet)
- '|In Progress|'
- '|Partial|'
- '|Complete|'
- '|Nothing To Do|'
"""
if entry == '':
return PaperStatus(PaperStatus.TODO, entry)
elif entry == '|In Progress|':
return PaperStatus(PaperStatus.IN_PROGRESS, entry)
elif entry == '|Partial|':
return PaperStatus(PaperStatus.PARTIAL, entry)
elif entry == '|Complete|':
return PaperStatus(PaperStatus.DONE, entry)
elif entry == '|Nothing To Do|':
return PaperStatus(PaperStatus.NOTHING_TO_DO, entry)
else:
raise RuntimeError(f'Unexpected CSV entry for status: {entry}')
@staticmethod
def from_github_issue(issue: Dict):
"""
Parse a paper status out of a Github issue obtained from querying a Github project.
"""
if 'status' not in issue:
return PaperStatus(PaperStatus.TODO)
elif issue['status'] == 'Todo':
return PaperStatus(PaperStatus.TODO)
elif issue['status'] == 'In Progress':
return PaperStatus(PaperStatus.IN_PROGRESS)
elif issue['status'] == 'Partial':
return PaperStatus(PaperStatus.PARTIAL)
elif issue['status'] == 'Done':
return PaperStatus(PaperStatus.DONE)
elif issue['status'] == 'Nothing To Do':
return PaperStatus(PaperStatus.NOTHING_TO_DO)
else:
raise RuntimeError(f"Received unrecognizable Github issue status: {issue['status']}")
def to_csv_entry(self) -> str:
"""
Return the issue state formatted for a CSV entry. The status is formatted as '|Complete|',
'|In Progress|', etc.
"""
mapping = {
PaperStatus.TODO: '',
PaperStatus.IN_PROGRESS: '|In Progress|',
PaperStatus.PARTIAL: '|Partial|',
PaperStatus.DONE: '|Complete|',
PaperStatus.NOTHING_TO_DO: '|Nothing To Do|',
}
return self._original if self._original is not None else mapping[self._status]
class PaperInfo:
paper_number: str
"""
Identifier for the paper or the LWG issue. This must be something like 'PnnnnRx', 'Nxxxxx' or 'LWGxxxxx'.
"""
paper_name: str
"""
Plain text string representing the name of the paper.
"""
status: PaperStatus
"""
Status of the paper/issue. This can be complete, in progress, partial, or done.
"""
meeting: Optional[str]
"""
Plain text string representing the meeting at which the paper/issue was voted.
"""
first_released_version: Optional[str]
"""
First version of LLVM in which this paper/issue was resolved.
"""
notes: Optional[str]
"""
Optional plain text string representing notes to associate to the paper.
This is used to populate the "Notes" column in the CSV status pages.
"""
original: Optional[object]
"""
Object from which this PaperInfo originated. This is used to track the CSV row or Github issue that
was used to generate this PaperInfo and is useful for error reporting purposes.
"""
def __init__(self, paper_number: str, paper_name: str,
status: PaperStatus,
meeting: Optional[str] = None,
first_released_version: Optional[str] = None,
notes: Optional[str] = None,
original: Optional[object] = None):
self.paper_number = paper_number
self.paper_name = paper_name
self.status = status
self.meeting = meeting
self.first_released_version = first_released_version
self.notes = notes
self.original = original
def for_printing(self) -> Tuple[str, str, str, str, str, str]:
return (
f'`{self.paper_number} <https://wg21.link/{self.paper_number}>`__',
self.paper_name,
self.meeting if self.meeting is not None else '',
self.status.to_csv_entry(),
self.first_released_version if self.first_released_version is not None else '',
self.notes if self.notes is not None else '',
)
def __repr__(self) -> str:
return repr(self.original) if self.original is not None else repr(self.for_printing())
@staticmethod
def from_csv_row(row: Tuple[str, str, str, str, str, str]):# -> PaperInfo:
"""
Given a row from one of our status-tracking CSV files, create a PaperInfo object representing that row.
"""
# Extract the paper number from the first column
match = re.search(r"((P[0-9R]+)|(LWG[0-9]+)|(N[0-9]+))\s+", row[0])
if match is None:
raise RuntimeError(f"Can't parse paper/issue number out of row: {row}")
return PaperInfo(
paper_number=match.group(1),
paper_name=row[1],
status=PaperStatus.from_csv_entry(row[3]),
meeting=row[2] or None,
first_released_version=row[4] or None,
notes=row[5] or None,
original=row,
)
@staticmethod
def from_github_issue(issue: Dict):# -> PaperInfo:
"""
Create a PaperInfo object from the Github issue information obtained from querying a Github Project.
"""
# Extract the paper number from the issue title
match = re.search(r"((P[0-9R]+)|(LWG[0-9]+)|(N[0-9]+)):", issue['title'])
if match is None:
raise RuntimeError(f"Issue doesn't have a title that we know how to parse: {issue}")
paper = match.group(1)
# Extract any notes from the Github issue and populate the RST notes with them
issue_description = issue['content']['body']
notes = extract_between_markers(issue_description, 'BEGIN-RST-NOTES', 'END-RST-NOTES')
notes = notes.strip() if notes is not None else notes
return PaperInfo(
paper_number=paper,
paper_name=issue['title'],
status=PaperStatus.from_github_issue(issue),
meeting=issue.get('meeting Voted', None),
first_released_version=None, # TODO
notes=notes,
original=issue,
)
def merge(paper: PaperInfo, gh: PaperInfo) -> PaperInfo:
"""
Merge a paper coming from a CSV row with a corresponding Github-tracked paper.
If the CSV row has a status that is "less advanced" than the Github issue, simply update the CSV
row with the newer status. Otherwise, report an error if they have a different status because
something must be wrong.
We don't update issues from 'To Do' to 'In Progress', since that only creates churn and the
status files aim to document user-facing functionality in releases, for which 'In Progress'
is not useful.
In case we don't update the CSV row's status, we still take any updated notes coming
from the Github issue.
"""
if paper.status == PaperStatus(PaperStatus.TODO) and gh.status == PaperStatus(PaperStatus.IN_PROGRESS):
result = copy.deepcopy(paper)
result.notes = gh.notes
elif paper.status < gh.status:
result = copy.deepcopy(gh)
elif paper.status == gh.status:
result = copy.deepcopy(paper)
result.notes = gh.notes
else:
print(f"We found a CSV row and a Github issue with different statuses:\nrow: {paper}\nGithub issue: {gh}")
result = copy.deepcopy(paper)
return result
def load_csv(file: pathlib.Path) -> List[Tuple]:
rows = []
with open(file, newline='') as f:
reader = csv.reader(f, delimiter=',')
for row in reader:
rows.append(row)
return rows
def write_csv(output: pathlib.Path, rows: List[Tuple]):
with open(output, 'w', newline='') as f:
writer = csv.writer(f, quoting=csv.QUOTE_ALL, lineterminator='\n')
for row in rows:
writer.writerow(row)
def create_github_issue(paper: PaperInfo, labels: List[str]) -> None:
"""
Create a new Github issue representing the given PaperInfo.
"""
paper_name = paper.paper_name.replace('``', '`').replace('\\', '')
create_cli = ['gh', 'issue', 'create', '--repo', 'llvm/llvm-project',
'--title', f'{paper.paper_number}: {paper_name}',
'--body', f'**Link:** https://wg21.link/{paper.paper_number}',
'--project', 'libc++ Standards Conformance',
'--label', 'libc++']
for label in labels:
create_cli += ['--label', label]
print("Do you want to create the following issue?")
print(create_cli)
answer = input("y/n: ")
if answer == 'n':
print("Not creating issue")
return
elif answer != 'y':
print(f"Invalid answer {answer}, skipping")
return
print("Creating issue")
issue_link = subprocess.check_output(create_cli).decode().strip()
print(f"Created tracking issue for {paper.paper_number}: {issue_link}")
# Retrieve the "Github project item ID" by re-adding the issue to the project again,
# even though we created it inside the project in the first place.
item_add_cli = ['gh', 'project', 'item-add', LIBCXX_CONFORMANCE_PROJECT, '--owner', 'llvm', '--url', issue_link, '--format', 'json']
item = json.loads(subprocess.check_output(item_add_cli).decode().strip())
# Then, adjust the 'Meeting Voted' field of that item.
meeting_voted_cli = ['gh', 'project', 'item-edit',
'--project-id', 'PVT_kwDOAQWwKc4AlOgt',
'--field-id', 'PVTF_lADOAQWwKc4AlOgtzgdUEXI', '--text', paper.meeting,
'--id', item['id']]
subprocess.check_call(meeting_voted_cli)
# And also adjust the 'Status' field of the item to 'To Do'.
status_cli = ['gh', 'project', 'item-edit',
'--project-id', 'PVT_kwDOAQWwKc4AlOgt',
'--field-id', 'PVTSSF_lADOAQWwKc4AlOgtzgdUBak', '--single-select-option-id', 'f75ad846',
'--id', item['id']]
subprocess.check_call(status_cli)
def sync_csv(rows: List[Tuple], from_github: List[PaperInfo], create_new: bool, labels: List[str] = None) -> List[Tuple]:
"""
Given a list of CSV rows representing an existing status file and a list of PaperInfos representing
up-to-date (but potentially incomplete) tracking information from Github, this function returns the
new CSV rows synchronized with the up-to-date information.
If `create_new` is True and a paper from the CSV file is not tracked on Github yet, this also prompts
to create a new issue on Github for tracking it. In that case the created issue is tagged with the
provided labels.
Note that this only tracks changes from 'not implemented' issues to 'implemented'. If an up-to-date
PaperInfo reports that a paper is not implemented but the existing CSV rows report it as implemented,
it is an error (i.e. the result is not a CSV row where the paper is *not* implemented).
"""
results = [rows[0]] # Start with the header
for row in rows[1:]: # Skip the header
# If the row contains empty entries, this is a "separator row" between meetings.
# Preserve it as-is.
if row[0] == "":
results.append(row)
continue
paper = PaperInfo.from_csv_row(row)
# Find any Github issues tracking this paper. Each row must have one and exactly one Github
# issue tracking it, which we validate below.
tracking = [gh for gh in from_github if paper.paper_number == gh.paper_number]
# If there's more than one tracking issue, something is weird.
if len(tracking) > 1:
print(f"Found a row with more than one tracking issue: {row}\ntracked by: {tracking}")
results.append(row)
continue
# If there is no tracking issue for that row and we are creating new issues, do that.
# Otherwise just log that we're missing an issue.
if len(tracking) == 0:
if create_new:
assert labels is not None, "Missing labels when creating new Github issues"
create_github_issue(paper, labels=labels)
else:
print(f"Can't find any Github issue for CSV row: {row}")
results.append(row)
continue
results.append(merge(paper, tracking[0]).for_printing())
return results
CSV_FILES_TO_SYNC = {
'Cxx17Issues.csv': ['c++17', 'lwg-issue'],
'Cxx17Papers.csv': ['c++17', 'wg21 paper'],
'Cxx20Issues.csv': ['c++20', 'lwg-issue'],
'Cxx20Papers.csv': ['c++20', 'wg21 paper'],
'Cxx23Issues.csv': ['c++23', 'lwg-issue'],
'Cxx23Papers.csv': ['c++23', 'wg21 paper'],
'Cxx2cIssues.csv': ['c++26', 'lwg-issue'],
'Cxx2cPapers.csv': ['c++26', 'wg21 paper'],
}
def main(argv):
import argparse
parser = argparse.ArgumentParser(prog='synchronize-status-files',
description='Synchronize the libc++ conformance status files with Github issues')
parser.add_argument('--validate-only', action='store_true',
help="Only perform the data validation of CSV files.")
parser.add_argument('--create-new', action='store_true',
help="Create new Github issues for CSV rows that do not correspond to any existing Github issue.")
parser.add_argument('--load-github-from', type=str,
help="A json file to load the Github project information from instead of querying the API. This is useful for testing to avoid rate limiting.")
args = parser.parse_args(argv)
libcxx_root = pathlib.Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Perform data validation for all the CSV files.
print("Performing data validation of the CSV files")
for filename in CSV_FILES_TO_SYNC:
csv = load_csv(libcxx_root / 'docs' / 'Status' / filename)
for row in csv[1:]: # Skip the header
if row[0] != "": # Skip separator rows
PaperInfo.from_csv_row(row)
if args.validate_only:
return
# Load all the Github issues tracking papers from Github.
if args.load_github_from:
print(f"Loading all issues from {args.load_github_from}")
with open(args.load_github_from, 'r') as f:
project_info = json.load(f)
else:
print("Loading all issues from Github")
gh_command_line = ['gh', 'project', 'item-list', LIBCXX_CONFORMANCE_PROJECT, '--owner', 'llvm', '--format', 'json', '--limit', '9999999']
project_info = json.loads(subprocess.check_output(gh_command_line))
from_github = [PaperInfo.from_github_issue(i) for i in project_info['items']]
# Synchronize CSV files with the Github issues.
for (filename, labels) in CSV_FILES_TO_SYNC.items():
print(f"Synchronizing {filename} with Github issues")
file = libcxx_root / 'docs' / 'Status' / filename
csv = load_csv(file)
synced = sync_csv(csv, from_github, create_new=args.create_new, labels=labels)
write_csv(file, synced)
if __name__ == '__main__':
import sys
main(sys.argv[1:])
|