File: contributors.py

package info (click to toggle)
scap-security-guide 0.1.76-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 110,644 kB
  • sloc: xml: 241,883; sh: 73,777; python: 32,527; makefile: 27
file content (145 lines) | stat: -rw-r--r-- 5,047 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
"""
Common functions for processing Contributors in SSG
"""

from __future__ import absolute_import
from __future__ import print_function

import collections
import datetime
import re
import os.path

from .shims import subprocess_check_output


MANUAL_EDIT_WARNING = """This file is generated using the %s script. DO NOT MANUALLY EDIT!!!!
Last Modified: %s UTC
""" % (os.path.basename(__file__), datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M"))

ignored_emails = (
    # No idea / ignore
    "lyd@chippy.(none)",
    "nick@null.net",
    "root@localhost.localdomain",
    "root@rhel6.(none)",
    "root@ip-10-0-8-36.ec2.internal",
    "badger@gitter.im",
    "46447321+allcontributors[bot]@users.noreply.github.com",
    "konflux@no-reply.konflux-ci.dev",
    "126015336+red-hat-konflux[bot]@users.noreply.github.com",
)


def _get_contributions_by_canonical_email(output):
    """
    Parses the given output to extract contributions by canonical email.

    Args:
        output (str): The string output containing contribution details. Each line should be in the format:
                      "<commits_count> <author_name> <email>".

    Returns:
        dict: A dictionary where the keys are email addresses and the values are lists of tuples. Each tuple contains
              the number of commits (int) and the author's name (str).

    Notes:
        - Lines that do not match the expected format are ignored.
        - Emails listed in the `ignored_emails` set are ignored.
    """
    contributions_by_email = collections.defaultdict(list)
    for line in output.split("\n"):
        match = re.match(r"[\s]*([0-9]+)\s+(.+)\s+\<(.+)\>", line)
        if match is None:
            continue

        commits_count, author_name, email = match.groups()

        if email in ignored_emails:
            continue  # ignored

        contributions_by_email[email].append((int(commits_count), author_name))
    return contributions_by_email


def _get_name_used_most_in_contributions(contribution_sets):
    """
    Determines the name that appears most frequently in the given contribution sets.

    Args:
        contribution_sets (list of tuples): A list where each tuple contains a count and a name.

    Returns:
        str: The name that is used most frequently in the contributions.
    """
    _, name_used_most = sorted(contribution_sets, reverse=True)[0]
    return name_used_most


def _get_contributor_email_mapping(contributions_by_email):
    """
    Generates a mapping of contributor names to their email addresses.

    Args:
        contributions_by_email (dict): A dictionary where the keys are email addresses
                                       and the values are lists of contributions associated
                                       with those email addresses.

    Returns:
        dict: A dictionary where the keys are the names used most frequently in the contributions
              and the values are the corresponding email addresses.
    """
    contributors = {}
    for email in contributions_by_email:
        name_used_most = _get_name_used_most_in_contributions(contributions_by_email[email])

        contributors[name_used_most] = email
    return contributors


def _names_sorted_by_last_name(names):
    """
    Sorts a list of names by their last names in ascending order.

    Args:
        names (list of str): A list of names where each name is a string.

    Returns:
        list of str: A list of names sorted by last name.
    """
    return sorted(names, key=lambda x: tuple(n.upper() for n in x.split(" "))[::-1])


def generate():
    """
    Generates a list of contributors in both Markdown and XML formats.

    This function retrieves the list of contributors from the 'git shortlog -se' command,
    processes the contributions by email, and maps them to contributor names.
    It then formats the contributors' information into Markdown and XML strings.

    Returns:
        tuple: A tuple containing two strings:
            - contributors_md (str): The contributors list in Markdown format.
            - contributors_xml (str): The contributors list in XML format.
    """
    output = subprocess_check_output(["git", "shortlog", "-se"]).decode("utf-8")
    contributions_by_email = _get_contributions_by_canonical_email(output)
    contributors = _get_contributor_email_mapping(contributions_by_email)

    contributors_md = "<!---%s--->\n\n" % MANUAL_EDIT_WARNING
    contributors_md += \
        "The following people have contributed to the SCAP Security Guide project\n"
    contributors_md += "(listed in alphabetical order):\n\n"

    contributors_xml = "<!--%s-->\n\n" % MANUAL_EDIT_WARNING
    contributors_xml += "<text>\n"

    for name in _names_sorted_by_last_name(list(contributors.keys())):
        email = contributors[name]
        contributors_md += "* %s <%s>\n" % (name, email)
        contributors_xml += "<contributor>%s &lt;%s&gt;</contributor>\n" % (name, email)

    contributors_xml += "</text>\n"

    return contributors_md, contributors_xml