1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
|
# In[1]:
# imports and set configuration
import pandas as pd
from retrieve_prs_data import run
exclude_prototype = True
data_filename = "10.0_to_11.0-rc2.json"
previous_release = "v10.0"
current_release = "v11.0-rc2"
# In[2]:
df = pd.read_json(data_filename).T
df.tail()
# In[3]:
all_labels = {lbl for labels in df["labels"] for lbl in labels}
all_labels
# In[4]:
# Add one column per label
for label in all_labels:
df[label] = df["labels"].apply(lambda labels_list: label in labels_list)
df.head()
# In[5]:
# Add a clean "module" column. It contains tuples since PRs can have more than one module.
# Maybe we should include "topics" in that column as well?
all_modules = { # mapping: full name -> clean name
label: "".join(label.split(" ")[1:]) for label in all_labels if label.startswith("module")
}
# We use an ugly loop, but whatever ¯\_(ツ)_/¯
df["module"] = [[] for _ in range(len(df))]
for i, row in df.iterrows():
for full_name, clean_name in all_modules.items():
if full_name in row["labels"]:
row["module"].append(clean_name)
df["module"] = df.module.apply(tuple)
df.head()
# In[6]:
mod_df = df.set_index("module").sort_index()
mod_df.tail()
# In[7]:
# All improvement PRs
mod_df[mod_df["enhancement"]].head()
# In[8]:
# improvement f module
# note: don't filter module name on the index as the index contain tuples with non-exclusive values
# Use the boolean column instead
mod_df[mod_df["enhancement"] & mod_df["module: transforms"]]
# In[9]:
def format_prs(mod_df):
out = []
for idx, row in mod_df.iterrows():
if exclude_prototype and row["prototype"]:
continue
modules = idx
# Put "documentation" and "tests" first for sorting to be dece
for last_module in ("documentation", "tests"):
if last_module in modules:
modules = [m for m in modules if m != last_module] + [last_module]
module = f"[{', '.join(modules)}]"
module = module.replace("referencescripts", "reference scripts")
module = module.replace("code", "reference scripts")
out.append(f"{module} {row['title']}")
return "\n".join(out)
# In[10]:
included_prs = pd.DataFrame()
# If labels are accurate, this shouhld generate most of the release notes already
# We keep track of the included PRs to figure out which ones are missing
for section_title, module_idx in (
("Backward-incompatible changes", "bc-breaking"),
("Deprecations", "deprecation"),
("New Features", "new feature"),
("Improvements", "enhancement"),
("Bug Fixes", "bug"),
("Code Quality", "code quality"),
):
print(f"## {section_title}")
print()
tmp_df = mod_df[mod_df[module_idx]]
included_prs = pd.concat([included_prs, tmp_df])
print(format_prs(tmp_df))
print()
# In[11]:
# Missing PRs are these ones... classify them manually
missing_prs = pd.concat([mod_df, included_prs]).drop_duplicates(subset="pr_number", keep=False)
print(format_prs(missing_prs))
# In[12]:
# Generate list of contributors
print()
print("## Contributors")
command_to_run = f"{{ git shortlog -s {previous_release}..{current_release} | cut -f2- & git log -s {previous_release}..{current_release} | grep Co-authored | cut -f2- -d: | cut -f1 -d\\< | sed 's/^ *//;s/ *$//' ; }} | sort --ignore-case | uniq | tr '\\n' ';' | sed 's/;/, /g;s/, $//' | fold -s"
rc, output, err = run(command_to_run)
print(output)
|