1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
|
import argparse
import os
import textwrap
from common import categories, topics, get_commit_data_cache
from commitlist import CommitList
class Categorizer:
def __init__(self, path, category='Uncategorized'):
self.cache = get_commit_data_cache()
self.commits = CommitList.from_existing(path)
# Special categories: 'Uncategorized'
# All other categories must be real
self.category = category
def categorize(self):
commits = self.commits.filter(category=self.category)
total_commits = len(self.commits.commits)
already_done = total_commits - len(commits)
i = 0
while i < len(commits):
cur_commit = commits[i]
next_commit = commits[i + 1] if i + 1 < len(commits) else None
jump_to = self.handle_commit(cur_commit, already_done + i + 1, total_commits, commits)
# Increment counter
if jump_to is not None:
i = jump_to
elif next_commit is None:
i = len(commits)
else:
i = commits.index(next_commit)
def features(self, commit):
return self.cache.get(commit.commit_hash)
def potential_reverts_of(self, commit, commits):
submodule_update_str = ['Update TensorPipe submodule',
'Updating submodules',
'Automated submodule update']
if any(a in commit.title for a in submodule_update_str):
return []
features = self.features(commit)
if 'Reverted' in features.labels:
reasons = {'GithubBot': "Reverted"}
else:
reasons = {}
index = commits.index(commit)
# -8 to remove the (#35011)
cleaned_title = commit.title[:-10]
# NB: the index + 2 is sketch
reasons.update({(index + 2 + delta): cand for delta, cand in enumerate(commits[index + 1:])
if cleaned_title in cand.title and
commit.commit_hash != cand.commit_hash})
return reasons
def handle_commit(self, commit, i, total, commits):
potential_reverts = self.potential_reverts_of(commit, commits)
if potential_reverts:
potential_reverts = f'!!!POTENTIAL REVERTS!!!: {potential_reverts}'
else:
potential_reverts = ""
features = self.features(commit)
breaking_alarm = ""
if 'module: bc-breaking' in features.labels:
breaking_alarm += "\n!!!!!! BC BREAKING !!!!!!"
if 'module: deprecation' in features.labels:
breaking_alarm += "\n!!!!!! DEPRECATION !!!!!!"
os.system('clear')
view = textwrap.dedent(f'''\
[{i}/{total}]
================================================================================
{features.title}
{potential_reverts} {breaking_alarm}
{features.body}
Files changed: {features.files_changed}
Labels: {features.labels}
Current category: {commit.category}
Select from: {', '.join(categories)}
''')
print(view)
cat_choice = None
while cat_choice is None:
value = input('category> ').strip()
if len(value) == 0:
cat_choice = commit.category
continue
choices = [cat for cat in categories
if cat.startswith(value)]
if len(choices) != 1:
print(f'Possible matches: {choices}, try again')
continue
cat_choice = choices[0]
print(f'\nSelected: {cat_choice}')
print(f'\nCurrent topic: {commit.topic}')
print(f'''Select from: {', '.join(topics)}''')
topic_choice = None
while topic_choice is None:
value = input('topic> ').strip()
if len(value) == 0:
topic_choice = commit.topic
continue
choices = [cat for cat in topics
if cat.startswith(value)]
if len(choices) != 1:
print(f'Possible matches: {choices}, try again')
continue
topic_choice = choices[0]
print(f'\nSelected: {topic_choice}')
self.update_commit(commit, cat_choice, topic_choice)
return None
def update_commit(self, commit, category, topic):
assert category in categories
assert topic in topics
commit.category = category
commit.topic = topic
self.commits.write_to_disk()
def main():
parser = argparse.ArgumentParser(description='Tool to help categorize commits')
parser.add_argument('--category', type=str, default='Uncategorized',
help='Which category to filter by. "Uncategorized", None, or a category name')
parser.add_argument('--file', help='The location of the commits CSV',
default='results/commitlist.csv')
args = parser.parse_args()
categorizer = Categorizer(args.file, args.category)
categorizer.categorize()
if __name__ == '__main__':
main()
|