File: gitlog2changelog.py.in

package info (click to toggle)
modules 5.6.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 23,036 kB
  • sloc: exp: 79,659; sh: 6,142; tcl: 5,900; makefile: 1,492; ansic: 474; python: 265; csh: 202; perl: 47; ruby: 44; lisp: 13
file content (316 lines) | stat: -rw-r--r-- 11,052 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
#!@PYTHON@
# Copyright 2008 Marcus D. Hanwell <marcus@cryos.org>
# Minor changes for NUT by Charles Lepple
# Subsequent maintenance for NUT by Jim Klimov (since 2021)
# Distributed under the terms of the GNU General Public License v2 or later

import re
import os
from textwrap import TextWrapper
import sys
import subprocess

# Python 3 compatibility hack
try:
    try:
        import unicode
    except:
        # Maybe built-in?
        pass
    unicode('')
except NameError as ex:
    #DEBUG# sys.stderr.write("Using 'str' as 'unicode': %s\n" % str(ex))
    #DEBUG# sys.stderr.flush()
    unicode = str

try:
    import unicodedata
except:
    pass

rev_range = "HEAD"

if len(sys.argv) > 1:
    base = sys.argv[1]
    rev_range = "%s..HEAD" % base

# Execute git log with the desired command line options.
# Support Python2 and Python3 (esp. 3.6 and earlier) semantics
# with regard to utf-8 content support (avois ascii decoding in Py3)
fin_mode = 0
# Remove trailing end of line? spitlines() in py3 variant takes care of them
fin_chop = 0
try:
    p = subprocess.Popen(
        [
            "git",
            "log",
            "--pretty=medium",
            "--summary",
            "--stat",
            "--no-merges",
            "--date=short",
            ("%s" % rev_range),
        ],
        encoding="utf-8",
        close_fds=True,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )
    fin, ferr = p.communicate()
    if p.wait() != 0:
        print("ERROR getting git changelog")
        sys.exit(1)
    fin = fin.splitlines()
    fin_mode = 3
except TypeError:
    fin = os.popen(
        "git log --pretty=medium --summary --stat --no-merges --date=short %s"
        % rev_range,
        "r",
    )
    fin_mode = 2
    fin_chop = 1

# Create a ChangeLog file in the current directory by default.
CHANGELOG_FILE = "ChangeLog"
try:
    # e.g. point from Makefile to a builddir (caller ensures it exists)
    if os.environ.get("CHANGELOG_FILE", None) is not None:
        CHANGELOG_FILE = os.environ.get("CHANGELOG_FILE")
except Exception as ignored:
    pass

if CHANGELOG_FILE == "-":
    fout = sys.stdout
else:
    if fin_mode == 3:
        fout = open(CHANGELOG_FILE, "w", encoding="UTF-8")
    else:
        fout = open(CHANGELOG_FILE, "w")

# By default we collect information from a commit and output it as soon as
# we have enough. Part of it is best-effort grouping of a series of commits
# made by the same author on the same day, if they follow each other.
# The alternative is to expend memory to collect all git log entries into a
# dictionary first (key = date+author, value = list of entries) and only
# print the output in the end of processing. This costs more resources, so
# is not default behavior.
requireGroupByDateAuthor = False
try:
    tmpEnvVar = os.environ.get("CHANGELOG_REQUIRE_GROUP_BY_DATE_AUTHOR", None)
    if str(tmpEnvVar).lower() == "true":
        requireGroupByDateAuthor = True
except Exception as ignored:
    pass

cachedContent = None
if requireGroupByDateAuthor:
    try:
        from collections import defaultdict
        cachedContent = defaultdict(list)
    except Exception as x:
        print("Failed to init requireGroupByDateAuthor processing as defaultdict(list), trying simple dict(): " + str(x))
        requireGroupByDateAuthor = False
        cachedContent = dict()

# Set up the loop variables in order to locate the blocks we want
authorFound = False
dateFound = False
messageFound = False
filesFound = False
message = ""
messageNL = False
files = ""
prevAuthorLine = ""

# Legacy default: keep as is
authorMustBeASCII = False
authorMustBeASCII_inverse_setting = str(os.environ.get("WITH_PDF_NONASCII_TITLES", "")).upper()
if authorMustBeASCII_inverse_setting in ["YES", "TRUE"]:
    authorMustBeASCII = False
elif authorMustBeASCII_inverse_setting in ["NO", "FALSE"]:
    authorMustBeASCII = True

# See also: https://github.com/python/cpython/blob/main/Lib/textwrap.py
wrapper = TextWrapper(initial_indent="  ", subsequent_indent="  ", width=78, break_on_hyphens=False)

# The main part of the loop
for line in fin:
    # The commit line marks the start of a new commit object.
    if line.startswith("commit"):
        # Start all over again...
        authorFound = False
        dateFound = False
        messageFound = False
        messageNL = False
        message = ""
        filesFound = False
        files = ""
        continue
    # Match the author line and extract the part we want
    # (Don't use startswith to allow Author override inside commit message.)
    elif "Author:" in line:
        if sys.version_info >= (3, 13, ):
            authorList = re.split(": ", line, maxsplit=1)
        else:
            authorList = re.split(": ", line, 1)

        try:
            author = authorList[1]
            author = author[0 : len(author) - fin_chop]
            if authorMustBeASCII:
                try:
                    if isinstance(author, str) and unicode != str:
                        author = unicodedata.normalize(u'NFKD', unicode(author, "utf-8")).encode('ascii', 'ignore').decode('utf8')
                    else:
                        author = unicodedata.normalize(u'NFKD', author).encode('ascii', 'ignore').decode('utf8')
                except Exception as e:
                    print("Could not unicodedata.normalize() author '%s': %s" % (author, str(e)))
            authorFound = True
        except:
            print("Could not parse authorList = '%s'" % (line))

    # Match the date line
    elif line.startswith("Date:"):
        if sys.version_info >= (3, 13, ):
            dateList = re.split(":   ", line, maxsplit=1)
        else:
            dateList = re.split(":   ", line, 1)

        try:
            date = dateList[1]
            date = date[0 : len(date) - fin_chop]
            dateFound = True
        except:
            print("Could not parse dateList = '%s'" % (line))
    # The Fossil-IDs are ignored:
    elif line.startswith("    Fossil-ID:") or line.startswith("    [[SVN:"):
        continue
    # The svn-id lines are ignored
    elif "    git-svn-id:" in line:
        continue
    # The sign off line is ignored too
    elif "Signed-off-by" in line:
        continue
    # Extract the actual commit message for this commit
    elif authorFound and dateFound and messageFound is False:
        # Find the commit message if we can (including the optional
        # details after the title and a blank line)
        # FIXME: Detect end of message by /^#/ to allow for longer essays
        #  in the detailed comments part?
        # FIXME: Some such comments include asciidoc-ish markup, notably
        #  bullet lists - do not concatenate those into one block but do
        #  actually pass them as sub-lists (indented, and perhaps not
        #  starting with an asterisk which we use for this document).
        if len(line) == fin_chop:
            if messageNL:
                messageFound = True
            else:
                messageNL = True
        elif len(line) == 3 + fin_chop:
            messageFound = True
        else:
            if len(message) == 0:
                message = message + line.strip()
            else:
                message = message + " " + line.strip()
    # If this line is hit all of the files have been stored for this commit
    elif re.search("files? changed", line):
        filesFound = True
        continue
    # Collect the files for this commit. FIXME: Still need to add +/- to files
    elif authorFound and dateFound and messageFound:
        if sys.version_info >= (3, 13, ):
            fileList = re.split(r' \| ', line, maxsplit=2)
        else:
            fileList = re.split(r' \| ', line, 2)

        if len(fileList) > 1:
            if len(files) > 0:
                files = files + ", " + fileList[0].strip()
            else:
                files = fileList[0].strip()

    # All of the parts of the commit have been found - write out the entry
    if authorFound and dateFound and messageFound and filesFound:
        # First the author line, only outputted if it is the first for that
        # author on this day.
        # WARNING: In case of git rebase commit shuffling, merges of dormant
        # branches, etc. we are not guaranteed to have all dates in the list
        # nicely ordered. In fact, the same date+author can be repeated if
        # there were commits with other metadata in git history between those
        # (e.g. many PRs from a few authors merged during one day). While we
        # could cache each section by authorLine and only output in the end,
        # it can require a lot of memory - so by default we do not.
        authorLine = date + "  " + author
        if requireGroupByDateAuthor:
            if authorLine not in cachedContent:
                cachedContent[authorLine] = list()
        else:
            if len(prevAuthorLine) == 0:
                fout.write(authorLine + "\n\n")
            elif authorLine == prevAuthorLine:
                pass
            else:
                fout.write("\n" + authorLine + "\n\n")

        # Assemble the actual commit message line(s) and limit the line length
        # to 80 characters.
        # Avoid printing same (or equivalent) filename lists twice, if commit
        # message starts with them.
        if message.startswith(files + ":"):
            commitLine = "* " + message
        else:
            namesF = None
            namesM = None
            try:
                namesM = sorted(re.split(r"[ ,]", message.split(":")[0]))
                namesF = sorted(re.split(r"[ ,]", files))
            except:
                pass

            if namesM is not None and namesM == namesF:
                commitLine = "* " + message
            else:
                commitLine = "* " + files + ": " + message

        if requireGroupByDateAuthor:
            cachedContent[authorLine].append(commitLine)
        else:
            # Write out the commit line, wrapped for length
            fout.write(wrapper.fill(commitLine) + "\n")

        # Now reset all the variables ready for a new commit block.
        authorFound = False
        dateFound = False
        messageFound = False
        messageNL = False
        message = ""
        filesFound = False
        files = ""
        prevAuthorLine = authorLine

if requireGroupByDateAuthor:
    # We did not print anything before, flush it out now;
    # most recent date first (alphanumerically reverse)
    counter = 0
    for authorLine in sorted(cachedContent, reverse=True):
        if counter == 0:
            fout.write(authorLine + "\n\n")
        else:
            fout.write("\n" + authorLine + "\n\n")

        # Use original list append order
        for commitLine in cachedContent[authorLine]:
            fout.write(wrapper.fill(commitLine) + "\n")

        counter = counter + 1

# Close the input and output lines now that we are finished.
if fin_mode == 3:
    p.stdout.close()
    p.stderr.close()
else:
    fin.close()
fout.close()