1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
|
#!/usr/bin/env python3
#
# Generate the AUTHORS file combining existing AUTHORS file with
# git commit log.
#
# Usage: generate_authors.py AUTHORS.src
# Copyright 2022 Moshe Kaplan
# Based on generate_authors.pl by Michael Mann
#
# Wireshark - Network traffic analyzer
# By Gerald Combs <gerald@wireshark.org>
# Copyright 1998 Gerald Combs
#
# SPDX-License-Identifier: GPL-2.0-or-later
import argparse
import re
import subprocess
have_pyuca = False
try:
from pyuca import Collator
have_pyuca = True
except ModuleNotFoundError:
import sys
sys.stderr.write('pyuca module not found. Sorting names using the built-in locale module.\n')
import locale
def get_git_authors():
'''
Sample line:
# 4321 Navin R. Johnson <nrjohnson@example.com>
'''
GIT_LINE_REGEX = r"^\s*\d+\s+([^<]*)\s*<([^>]*)>"
cmd = "git --no-pager shortlog --email --summary HEAD".split(' ')
# check_output is used for Python 3.4 compatibility
git_cmd_output = subprocess.check_output(cmd, universal_newlines=True, encoding='utf-8')
git_authors = []
for line in git_cmd_output.splitlines():
# Check if this is needed:
line = line.strip()
match = re.match(GIT_LINE_REGEX, line)
name = match.group(1).strip()
email = match.group(2).strip()
# Try to lower how much spam people get:
email = email.replace('@', '[AT]')
git_authors.append((name, email))
if have_pyuca:
c = Collator()
return sorted(git_authors, key=lambda x: c.sort_key(x[0]))
return sorted(git_authors, key=lambda x: locale.strxfrm(x[0].casefold()))
def extract_contributors(authors_content):
# Extract names and email addresses from the AUTHORS file Contributors
contributors_content = authors_content.split("= Contributors =", 1)[1]
CONTRIBUTOR_LINE_REGEX = r"^([\w\.\-\'\x80-\xff]+(\s*[\w+\.\-\'\x80-\xff])*)\s+<([^>]*)>"
contributors = []
state = ""
for line in contributors_content.splitlines():
contributor_match = re.match(CONTRIBUTOR_LINE_REGEX, line)
if re.search(r'([^\{]*)\{', line):
if contributor_match:
name = contributor_match.group(1)
email = contributor_match.group(3)
contributors.append((name, email))
state = "s_in_bracket"
elif state == "s_in_bracket":
if re.search(r'([^\}]*)\}', line):
state = ""
elif re.search('<', line):
if contributor_match:
name = contributor_match.group(1)
email = contributor_match.group(3)
contributors.append((name, email))
elif re.search(r"(e-mail address removed at contributor's request)", line):
if contributor_match:
name = contributor_match.group(1)
email = contributor_match.group(3)
contributors.append((name, email))
else:
pass
return contributors
def generate_git_contributors_text(contributors_emails, git_authors_emails):
# Track the email addresses seen to avoid including the same email address twice
emails_addresses_seen = set()
for name, email in contributors_emails:
emails_addresses_seen.add(email.lower())
output_lines = []
for name, email in git_authors_emails:
if email.lower() in emails_addresses_seen:
continue
# Skip Gerald, since he's part of the header:
if email == "gerald[AT]wireshark.org":
continue
ntab = 3
if len(name) >= 8*ntab:
line = "{name} <{email}>".format(name=name, email=email)
else:
ntab -= len(name)/8
if len(name) % 8:
ntab += 1
tabs = '\t'*int(ntab)
line = "{name}{tabs}<{email}>".format(name=name, tabs=tabs, email=email)
emails_addresses_seen.add(email.lower())
output_lines += [line]
return "\n".join(output_lines)
# Read authors file until we find gitlog entries, then stop
def read_authors(parsed_args):
lines = []
with open(parsed_args.authors[0], 'r', encoding='utf-8') as fh:
for line in fh.readlines():
if '= From git log =' in line:
break
lines.append(line)
return ''.join(lines)
def main():
parser = argparse.ArgumentParser(description="Generate the AUTHORS file combining existing AUTHORS file with git commit log.")
parser.add_argument("authors", metavar='authors', nargs=1, help="path to AUTHORS file")
parsed_args = parser.parse_args()
author_content = read_authors(parsed_args)
# Collect the listed contributors emails so that we don't duplicate them
# in the listing of git contributors
contributors_emails = extract_contributors(author_content)
git_authors_emails = get_git_authors()
# Then generate the text output for git contributors
git_contributors_text = generate_git_contributors_text(contributors_emails, git_authors_emails)
# Now we can write our output:
git_contributor_header = '= From git log =\n\n'
output = author_content + git_contributor_header + git_contributors_text + '\n'
with open(parsed_args.authors[0], 'w', encoding='utf-8') as fh:
fh.write(output)
if __name__ == '__main__':
main()
|