File: committers.py

package info (click to toggle)
qtwebkit-opensource-src 5.7.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 291,692 kB
  • ctags: 268,122
  • sloc: cpp: 1,360,420; python: 70,286; ansic: 42,986; perl: 35,476; ruby: 12,236; objc: 9,465; xml: 8,396; asm: 3,873; yacc: 2,397; sh: 1,647; makefile: 650; lex: 644; java: 110
file content (273 lines) | stat: -rw-r--r-- 11,526 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
# Copyright (c) 2011, Apple Inc. All rights reserved.
# Copyright (c) 2009, 2011, 2012 Google Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
#     * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
#     * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# WebKit's Python module for committer and reviewer validation.

import fnmatch
import json

from webkitpy.common.editdistance import edit_distance
from webkitpy.common.memoized import memoized
from webkitpy.common.system.filesystem import FileSystem


# The list of contributors have been moved to contributors.json


class Contributor(object):
    def __init__(self, name, email_or_emails, irc_nickname_or_nicknames=None):
        assert(name)
        assert(email_or_emails)
        self.full_name = name
        if isinstance(email_or_emails, str):
            self.emails = [email_or_emails]
        else:
            self.emails = email_or_emails
        self.emails = map(lambda email: email.lower(), self.emails)  # Emails are case-insensitive.
        if isinstance(irc_nickname_or_nicknames, str):
            self.irc_nicknames = [irc_nickname_or_nicknames]
        else:
            self.irc_nicknames = irc_nickname_or_nicknames
        self.can_commit = False
        self.can_review = False

    def bugzilla_email(self):
        # FIXME: We're assuming the first email is a valid bugzilla email,
        # which might not be right.
        return self.emails[0]

    def __str__(self):
        return unicode(self).encode('utf-8')

    def __unicode__(self):
        return '"%s" <%s>' % (self.full_name, self.emails[0])

    def contains_string(self, search_string):
        string = search_string.lower()
        if string in self.full_name.lower():
            return True
        if self.irc_nicknames:
            for nickname in self.irc_nicknames:
                if string in nickname.lower():
                    return True
        for email in self.emails:
            if string in email:
                return True
        return False

    def matches_glob(self, glob_string):
        if fnmatch.fnmatch(self.full_name, glob_string):
            return True
        if self.irc_nicknames:
            for nickname in self.irc_nicknames:
                if fnmatch.fnmatch(nickname, glob_string):
                    return True
        for email in self.emails:
            if fnmatch.fnmatch(email, glob_string):
                return True
        return False


class Committer(Contributor):
    def __init__(self, name, email_or_emails, irc_nickname=None):
        Contributor.__init__(self, name, email_or_emails, irc_nickname)
        self.can_commit = True


class Reviewer(Committer):
    def __init__(self, name, email_or_emails, irc_nickname=None):
        Committer.__init__(self, name, email_or_emails, irc_nickname)
        self.can_review = True


class CommitterList(object):

    # Committers and reviewers are passed in to allow easy testing
    def __init__(self,
                 committers=[],
                 reviewers=[],
                 contributors=[]):
        # FIXME: These arguments only exist for testing. Clean it up.
        if not (committers or reviewers or contributors):
            loaded_data = self.load_json()
            contributors = loaded_data['Contributors']
            committers = loaded_data['Committers']
            reviewers = loaded_data['Reviewers']

        self._contributors = contributors + committers + reviewers
        self._committers = committers + reviewers
        self._reviewers = reviewers
        self._contributors_by_name = {}
        self._accounts_by_email = {}
        self._accounts_by_login = {}

    @staticmethod
    @memoized
    def load_json():
        filesystem = FileSystem()
        json_path = filesystem.join(filesystem.dirname(filesystem.path_to_module('webkitpy.common.config')), 'contributors.json')
        contributors = json.loads(filesystem.read_text_file(json_path))

        return {
            'Contributors': [Contributor(name, data.get('emails'), data.get('nicks')) for name, data in contributors['Contributors'].iteritems()],
            'Committers': [Committer(name, data.get('emails'), data.get('nicks')) for name, data in contributors['Committers'].iteritems()],
            'Reviewers': [Reviewer(name, data.get('emails'), data.get('nicks')) for name, data in contributors['Reviewers'].iteritems()],
        }

    def contributors(self):
        return self._contributors

    def committers(self):
        return self._committers

    def reviewers(self):
        return self._reviewers

    def _name_to_contributor_map(self):
        if not len(self._contributors_by_name):
            for contributor in self._contributors:
                assert(contributor.full_name)
                assert(contributor.full_name.lower() not in self._contributors_by_name)  # We should never have duplicate names.
                self._contributors_by_name[contributor.full_name.lower()] = contributor
        return self._contributors_by_name

    def _email_to_account_map(self):
        if not len(self._accounts_by_email):
            for account in self._contributors:
                for email in account.emails:
                    assert(email not in self._accounts_by_email)  # We should never have duplicate emails.
                    self._accounts_by_email[email] = account
        return self._accounts_by_email

    def _login_to_account_map(self):
        if not len(self._accounts_by_login):
            for account in self._contributors:
                if account.emails:
                    login = account.bugzilla_email()
                    assert(login not in self._accounts_by_login)  # We should never have duplicate emails.
                    self._accounts_by_login[login] = account
        return self._accounts_by_login

    def _committer_only(self, record):
        if record and not record.can_commit:
            return None
        return record

    def _reviewer_only(self, record):
        if record and not record.can_review:
            return None
        return record

    def committer_by_name(self, name):
        return self._committer_only(self.contributor_by_name(name))

    def contributor_by_irc_nickname(self, irc_nickname):
        for contributor in self.contributors():
            # FIXME: This should do case-insensitive comparison or assert that all IRC nicknames are in lowercase
            if contributor.irc_nicknames and irc_nickname in contributor.irc_nicknames:
                return contributor
        return None

    def contributors_by_search_string(self, string):
        glob_matches = filter(lambda contributor: contributor.matches_glob(string), self.contributors())
        return glob_matches or filter(lambda contributor: contributor.contains_string(string), self.contributors())

    def contributors_by_email_username(self, string):
        string = string + '@'
        result = []
        for contributor in self.contributors():
            for email in contributor.emails:
                if email.startswith(string):
                    result.append(contributor)
                    break
        return result

    def _contributor_name_shorthands(self, contributor):
        if ' ' not in contributor.full_name:
            return []
        split_fullname = contributor.full_name.split()
        first_name = split_fullname[0]
        last_name = split_fullname[-1]
        return first_name, last_name, first_name + last_name[0], first_name + ' ' + last_name[0]

    def _tokenize_contributor_name(self, contributor):
        full_name_in_lowercase = contributor.full_name.lower()
        tokens = [full_name_in_lowercase] + full_name_in_lowercase.split()
        if contributor.irc_nicknames:
            return tokens + [nickname.lower() for nickname in contributor.irc_nicknames if len(nickname) > 5]
        return tokens

    def contributors_by_fuzzy_match(self, string):
        string_in_lowercase = string.lower()

        # 1. Exact match for fullname, email and irc_nicknames
        account = self.contributor_by_name(string_in_lowercase) or self.contributor_by_email(string_in_lowercase) or self.contributor_by_irc_nickname(string_in_lowercase)
        if account:
            return [account], 0

        # 2. Exact match for email username (before @)
        accounts = self.contributors_by_email_username(string_in_lowercase)
        if accounts and len(accounts) == 1:
            return accounts, 0

        # 3. Exact match for first name, last name, and first name + initial combinations such as "Dan B" and "Tim H"
        accounts = [contributor for contributor in self.contributors() if string in self._contributor_name_shorthands(contributor)]
        if accounts and len(accounts) == 1:
            return accounts, 0

        # 4. Finally, fuzzy-match using edit-distance
        string = string_in_lowercase
        contributorWithMinDistance = []
        minDistance = len(string) / 2 - 1
        for contributor in self.contributors():
            tokens = self._tokenize_contributor_name(contributor)
            editdistances = [edit_distance(token, string) for token in tokens if abs(len(token) - len(string)) <= minDistance]
            if not editdistances:
                continue
            distance = min(editdistances)
            if distance == minDistance:
                contributorWithMinDistance.append(contributor)
            elif distance < minDistance:
                contributorWithMinDistance = [contributor]
                minDistance = distance
        if not len(contributorWithMinDistance):
            return [], len(string)
        return contributorWithMinDistance, minDistance

    def contributor_by_email(self, email):
        return self._email_to_account_map().get(email.lower()) if email else None

    def contributor_by_name(self, name):
        return self._name_to_contributor_map().get(name.lower()) if name else None

    def committer_by_email(self, email):
        return self._committer_only(self.contributor_by_email(email))

    def reviewer_by_email(self, email):
        return self._reviewer_only(self.contributor_by_email(email))