1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
|
# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
# For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt
"""
Parse CHANGES.md into a JSON structure.
Run with two arguments: the .md file to parse, and the JSON file to write:
python parse_relnotes.py CHANGES.md relnotes.json
Every section that has something that looks like a version number in it will
be recorded as the release notes for that version.
"""
import json
import re
import sys
class TextChunkBuffer:
"""Hold onto text chunks until needed."""
def __init__(self):
self.buffer = []
def append(self, text):
"""Add `text` to the buffer."""
self.buffer.append(text)
def clear(self):
"""Clear the buffer."""
self.buffer = []
def flush(self):
"""Produce a ("text", text) tuple if there's anything here."""
buffered = "".join(self.buffer).strip()
if buffered:
yield ("text", buffered)
self.clear()
def parse_md(lines):
"""Parse markdown lines, producing (type, text) chunks."""
buffer = TextChunkBuffer()
for line in lines:
if header_match := re.search(r"^(#+) (.+)$", line):
yield from buffer.flush()
hashes, text = header_match.groups()
yield (f"h{len(hashes)}", text)
else:
buffer.append(line)
yield from buffer.flush()
def sections(parsed_data):
"""Convert a stream of parsed tokens into sections with text and notes.
Yields a stream of:
('h-level', 'header text', 'text')
"""
header = None
text = []
for ttype, ttext in parsed_data:
if ttype.startswith('h'):
if header:
yield (*header, "\n".join(text))
text = []
header = (ttype, ttext)
elif ttype == "text":
text.append(ttext)
else:
raise RuntimeError(f"Don't know ttype {ttype!r}")
yield (*header, "\n".join(text))
def refind(regex, text):
"""Find a regex in some text, and return the matched text, or None."""
if m := re.search(regex, text):
return m.group()
else:
return None
def fix_ref_links(text, version):
"""Find links to .rst files, and make them full RTFD links."""
def new_link(m):
return f"](https://coverage.readthedocs.io/en/{version}/{m[1]}.html{m[2]})"
return re.sub(r"\]\((\w+)\.rst(#.*?)\)", new_link, text)
def relnotes(mdlines):
r"""Yield (version, text) pairs from markdown lines.
Each tuple is a separate version mentioned in the release notes.
A version is any section with \d\.\d in the header text.
"""
for _, htext, text in sections(parse_md(mdlines)):
version = refind(r"\d+\.\d[^ ]*", htext)
if version:
prerelease = any(c in version for c in "abc")
when = refind(r"\d+-\d+-\d+", htext)
text = fix_ref_links(text, version)
yield {
"version": version,
"text": text,
"prerelease": prerelease,
"when": when,
}
def parse(md_filename, json_filename):
"""Main function: parse markdown and write JSON."""
with open(md_filename) as mf:
markdown = mf.read()
with open(json_filename, "w") as jf:
json.dump(list(relnotes(markdown.splitlines(True))), jf, indent=4)
if __name__ == "__main__":
parse(*sys.argv[1:3])
|