File: classify_prs.py

package info (click to toggle)
pytorch-vision 0.14.1-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 15,188 kB
  • sloc: python: 49,008; cpp: 10,019; sh: 610; java: 550; xml: 79; objc: 56; makefile: 32
file content (138 lines) | stat: -rw-r--r-- 3,465 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# In[1]:

# imports and set configuration
import pandas as pd
from retrieve_prs_data import run

exclude_prototype = True
data_filename = "10.0_to_11.0-rc2.json"
previous_release = "v10.0"
current_release = "v11.0-rc2"

# In[2]:


df = pd.read_json(data_filename).T
df.tail()


# In[3]:


all_labels = {lbl for labels in df["labels"] for lbl in labels}
all_labels


# In[4]:


# Add one column per label
for label in all_labels:
    df[label] = df["labels"].apply(lambda labels_list: label in labels_list)
df.head()


# In[5]:


# Add a clean "module" column. It contains tuples since PRs can have more than one module.
# Maybe we should include "topics" in that column as well?

all_modules = {  # mapping: full name -> clean name
    label: "".join(label.split(" ")[1:]) for label in all_labels if label.startswith("module")
}

# We use an ugly loop, but whatever ¯\_(ツ)_/¯
df["module"] = [[] for _ in range(len(df))]
for i, row in df.iterrows():
    for full_name, clean_name in all_modules.items():
        if full_name in row["labels"]:
            row["module"].append(clean_name)
df["module"] = df.module.apply(tuple)
df.head()


# In[6]:


mod_df = df.set_index("module").sort_index()
mod_df.tail()


# In[7]:


# All improvement PRs
mod_df[mod_df["enhancement"]].head()


# In[8]:


# improvement f module
# note: don't filter module name on the index as the index contain tuples with non-exclusive values
# Use the boolean column instead
mod_df[mod_df["enhancement"] & mod_df["module: transforms"]]


# In[9]:


def format_prs(mod_df):
    out = []
    for idx, row in mod_df.iterrows():
        if exclude_prototype and row["prototype"]:
            continue
        modules = idx
        # Put "documentation" and "tests" first for sorting to be dece
        for last_module in ("documentation", "tests"):
            if last_module in modules:
                modules = [m for m in modules if m != last_module] + [last_module]

        module = f"[{', '.join(modules)}]"
        module = module.replace("referencescripts", "reference scripts")
        module = module.replace("code", "reference scripts")
        out.append(f"{module} {row['title']}")

    return "\n".join(out)


# In[10]:


included_prs = pd.DataFrame()

# If labels are accurate, this shouhld generate most of the release notes already
# We keep track of the included PRs to figure out which ones are missing
for section_title, module_idx in (
    ("Backward-incompatible changes", "bc-breaking"),
    ("Deprecations", "deprecation"),
    ("New Features", "new feature"),
    ("Improvements", "enhancement"),
    ("Bug Fixes", "bug"),
    ("Code Quality", "code quality"),
):
    print(f"## {section_title}")
    print()
    tmp_df = mod_df[mod_df[module_idx]]
    included_prs = pd.concat([included_prs, tmp_df])
    print(format_prs(tmp_df))
    print()


# In[11]:


# Missing PRs are these ones... classify them manually
missing_prs = pd.concat([mod_df, included_prs]).drop_duplicates(subset="pr_number", keep=False)
print(format_prs(missing_prs))

# In[12]:

# Generate list of contributors
print()
print("## Contributors")

command_to_run = f"{{ git shortlog -s {previous_release}..{current_release} | cut -f2- & git log -s {previous_release}..{current_release} | grep Co-authored | cut -f2- -d: | cut -f1 -d\\< | sed 's/^ *//;s/ *$//' ; }} | sort --ignore-case | uniq | tr '\\n' ';' | sed 's/;/, /g;s/, $//' | fold -s"
rc, output, err = run(command_to_run)
print(output)