File: categorize.py

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (146 lines) | stat: -rw-r--r-- 5,067 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import argparse
import os
import textwrap
from common import categories, topics, get_commit_data_cache
from commitlist import CommitList

class Categorizer:
    def __init__(self, path, category='Uncategorized'):
        self.cache = get_commit_data_cache()
        self.commits = CommitList.from_existing(path)

        # Special categories: 'Uncategorized'
        # All other categories must be real
        self.category = category

    def categorize(self):
        commits = self.commits.filter(category=self.category)
        total_commits = len(self.commits.commits)
        already_done = total_commits - len(commits)
        i = 0
        while i < len(commits):
            cur_commit = commits[i]
            next_commit = commits[i + 1] if i + 1 < len(commits) else None
            jump_to = self.handle_commit(cur_commit, already_done + i + 1, total_commits, commits)

            # Increment counter
            if jump_to is not None:
                i = jump_to
            elif next_commit is None:
                i = len(commits)
            else:
                i = commits.index(next_commit)

    def features(self, commit):
        return self.cache.get(commit.commit_hash)

    def potential_reverts_of(self, commit, commits):
        submodule_update_str = ['Update TensorPipe submodule',
                                'Updating submodules',
                                'Automated submodule update']
        if any(a in commit.title for a in submodule_update_str):
            return []

        features = self.features(commit)
        if 'Reverted' in features.labels:
            reasons = {'GithubBot': "Reverted"}
        else:
            reasons = {}

        index = commits.index(commit)
        # -8 to remove the (#35011)
        cleaned_title = commit.title[:-10]
        # NB: the index + 2 is sketch
        reasons.update({(index + 2 + delta): cand for delta, cand in enumerate(commits[index + 1:])
                if cleaned_title in cand.title and
                commit.commit_hash != cand.commit_hash})
        return reasons

    def handle_commit(self, commit, i, total, commits):
        potential_reverts = self.potential_reverts_of(commit, commits)
        if potential_reverts:
            potential_reverts = f'!!!POTENTIAL REVERTS!!!: {potential_reverts}'
        else:
            potential_reverts = ""

        features = self.features(commit)

        breaking_alarm = ""
        if 'module: bc-breaking' in features.labels:
            breaking_alarm += "\n!!!!!! BC BREAKING !!!!!!"

        if 'module: deprecation' in features.labels:
            breaking_alarm += "\n!!!!!! DEPRECATION !!!!!!"

        os.system('clear')
        view = textwrap.dedent(f'''\
[{i}/{total}]
================================================================================
{features.title}

{potential_reverts} {breaking_alarm}

{features.body}

Files changed: {features.files_changed}

Labels: {features.labels}

Current category: {commit.category}

Select from: {', '.join(categories)}

        ''')
        print(view)
        cat_choice = None
        while cat_choice is None:
            value = input('category> ').strip()
            if len(value) == 0:
                cat_choice = commit.category
                continue
            choices = [cat for cat in categories
                       if cat.startswith(value)]
            if len(choices) != 1:
                print(f'Possible matches: {choices}, try again')
                continue
            cat_choice = choices[0]
        print(f'\nSelected: {cat_choice}')
        print(f'\nCurrent topic: {commit.topic}')
        print(f'''Select from: {', '.join(topics)}''')
        topic_choice = None
        while topic_choice is None:
            value = input('topic> ').strip()
            if len(value) == 0:
                topic_choice = commit.topic
                continue
            choices = [cat for cat in topics
                       if cat.startswith(value)]
            if len(choices) != 1:
                print(f'Possible matches: {choices}, try again')
                continue
            topic_choice = choices[0]
        print(f'\nSelected: {topic_choice}')
        self.update_commit(commit, cat_choice, topic_choice)
        return None

    def update_commit(self, commit, category, topic):
        assert category in categories
        assert topic in topics
        commit.category = category
        commit.topic = topic
        self.commits.write_to_disk()

def main():
    parser = argparse.ArgumentParser(description='Tool to help categorize commits')
    parser.add_argument('--category', type=str, default='Uncategorized',
                        help='Which category to filter by. "Uncategorized", None, or a category name')
    parser.add_argument('--file', help='The location of the commits CSV',
                        default='results/commitlist.csv')

    args = parser.parse_args()
    categorizer = Categorizer(args.file, args.category)
    categorizer.categorize()


if __name__ == '__main__':
    main()