File: git-contributors.py

package info (click to toggle)
xfsprogs 6.18.0-2
  • links: PTS
  • area: main
  • in suites:
  • size: 11,480 kB
  • sloc: ansic: 167,330; sh: 4,604; makefile: 1,337; python: 835; cpp: 5
file content (168 lines) | stat: -rwxr-xr-x 5,750 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#!/usr/bin/env python3

# List all contributors to a series of git commits.
# Copyright(C) 2025 Oracle, All Rights Reserved.
# Licensed under GPL 2.0 or later

import re
import subprocess
import io
import sys
import argparse
import email.utils

DEBUG = False

def backtick(args):
    '''Generator function that yields lines of a program's stdout.'''
    if DEBUG:
        print(' '.join(args))
    p = subprocess.Popen(args, stdout = subprocess.PIPE)
    for line in io.TextIOWrapper(p.stdout, encoding="utf-8"):
        yield line

class find_developers(object):
    def __init__(self):
        tags = '%s|%s|%s|%s|%s|%s|%s|%s' % (
            'signed-off-by',
            'acked-by',
            'cc',
            'reviewed-by',
            'reported-by',
            'tested-by',
            'suggested-by',
            'reported-and-tested-by')
        # some tag, a colon, a space, and everything after that
        regex1 = r'^(%s):\s+(.+)$' % tags

        self.r1 = re.compile(regex1, re.I)

        # regex to guess if this is a list of multiple addresses.
        # Not sure why the initial "^.*" is needed here.
        self.r2 = re.compile(r'^.*,[^,]*@[^@]*,[^,]*@', re.I)

        # regex to match on anything inside a pair of angle brackets
        self.r3 = re.compile(r'^.*<(.+)>', re.I)

    def _handle_addr(self, addr):
        # The next split removes everything after an octothorpe (hash
        # mark), because someone could have provided an improperly
        # formatted email address:
        #
        # Cc: stable@vger.kernel.org # v6.19+
        #
        # This, according to my reading of RFC5322, is allowed because
        # octothorpes can be part of atom text.  However, it is
        # interepreted as if there weren't any whitespace
        # ("stable@vger.kernel.org#v6.19+").  The grammar allows for
        # this form, even though this is not a correct Internet domain
        # name.
        #
        # Worse, if you follow the format specified in the kernel's
        # SubmittingPatches file:
        #
        # Cc: <stable@vger.kernel.org> # v6.9
        #
        # emailutils will not know how to parse this, and returns empty
        # strings.  I think this is because the angle-addr
        # specification allows only whitespace between the closing
        # angle bracket and the CRLF.
        #
        # Hack around both problems by ignoring everything after an
        # octothorpe, no matter where it occurs in the string.  If
        # someone has one in their name or the email address, too bad.
        a = addr.split('#')[0]

        # emailutils can extract email addresses from headers that
        # roughly follow the destination address field format:
        #
        # Reviewed-by: Bogus J. Simpson <bogus@simpson.com>
        # Reviewed-by: "Bogus J. Simpson" <bogus@simpson.com>
        # Reviewed-by: bogus@simpson.com
        #
        # Use it to extract the email address, because we don't care
        # about the display name.
        (name, addr) = email.utils.parseaddr(a)
        if DEBUG:
            print(f'A:{a}:NAME:{name}:ADDR:{addr}:')
        if len(addr) > 0:
            return addr

        # If emailutils fails to find anything, let's see if there's
        # a sequence of characters within angle brackets and hope that
        # is an email address.  This works around things like:
        #
        # Reported-by: Xu, Wen <wen.xu@gatech.edu>
        #
        # Which should have had the name in quotations because there's
        # a comma.
        m = self.r3.match(a)
        if m:
            addr = m.expand(r'\g<1>')
            if DEBUG:
                print(f"M3:{addr}:M:{m}:")
            return addr

        # No idea, just spit the whole thing out and hope for the best.
        return a

    def run(self, lines):
        addr_list = []

        for line in lines:
            l = line.strip()

            # First, does this line match any of the headers we
            # know about?
            m = self.r1.match(l)
            if not m:
                continue
            rightside = m.expand(r'\g<2>')

            n = self.r2.match(rightside)
            if n:
                # Break the line into an array of addresses,
                # delimited by commas, then handle each
                # address.
                addrs = rightside.split(',')
                if DEBUG:
                    print(f"0LINE:{rightside}:ADDRS:{addrs}:M:{n}")
                for addr in addrs:
                    a = self._handle_addr(addr)
                    addr_list.append(a)
            else:
                # Otherwise treat the line as a single email
                # address.
                if DEBUG:
                    print(f"1LINE:{rightside}:M:{n}")
                a = self._handle_addr(rightside)
                addr_list.append(a)

        return sorted(set(addr_list))

def main():
    global DEBUG

    parser = argparse.ArgumentParser(description = "List email addresses of contributors to a series of git commits.")
    parser.add_argument("revspec", help = "git revisions to process.")
    parser.add_argument("--separator", type = str, default = '\n', \
            help = "Separate each email address with this string.")
    parser.add_argument('--debug', action = 'store_true', default = False, \
            help = argparse.SUPPRESS)
    args = parser.parse_args()

    if args.debug:
        DEBUG = True

    fd = find_developers()
    if args.revspec:
        # read git commits from repo
        contributors = fd.run(backtick(['git', 'log', '--pretty=medium',
                  args.revspec]))

    print(args.separator.join(sorted(contributors)))
    return 0

if __name__ == '__main__':
    sys.exit(main())