1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
|
#!/usr/bin/env python3
import json
import os
import re
import sys
from dataclasses import dataclass
from pathlib import Path
__all__ = ["get_docs_section"]
DOCS_ROOT = "https://gnome.pages.gitlab.gnome.org/blueprint-compiler"
sections: dict[str, "Section"] = {}
@dataclass
class Section:
link: str
lines: str
def to_json(self):
return {
"content": rst_to_md(self.lines),
"link": self.link,
}
def load_reference_docs():
for filename in Path(os.path.dirname(__file__), "reference").glob("*.rst"):
with open(filename) as f:
section_name = None
lines = []
def close_section():
if section_name:
html_file = re.sub(r"\.rst$", ".html", filename.name)
anchor = re.sub(r"[^a-z0-9]+", "-", section_name.lower())
link = f"{DOCS_ROOT}/reference/{html_file}#{anchor}"
sections[section_name] = Section(link, lines)
for line in f:
if m := re.match(r"\.\.\s+_(.*):", line):
close_section()
section_name = m.group(1)
lines = []
else:
lines.append(line)
close_section()
# This isn't a comprehensive rST to markdown converter, it just needs to handle the
# small subset of rST used in the reference docs.
def rst_to_md(lines: list[str]) -> str:
result = ""
def rst_to_md_inline(line):
line = re.sub(r"``(.*?)``", r"`\1`", line)
line = re.sub(
r":ref:`(.*?)<(.*?)>`",
lambda m: f"[{m.group(1)}]({sections[m.group(2)].link})",
line,
)
line = re.sub(r"`([^`]*?) <([^`>]*?)>`_", r"[\1](\2)", line)
return line
i = 0
n = len(lines)
heading_levels = {}
def print_block(lang: str = "", code: bool = True, strip_links: bool = False):
nonlocal result, i
block = ""
while i < n:
line = lines[i].rstrip()
if line.startswith(" "):
line = line[3:]
elif line != "":
break
if strip_links:
line = re.sub(r":ref:`(.*?)<(.*?)>`", r"\1", line)
if not code:
line = rst_to_md_inline(line)
block += line + "\n"
i += 1
if code:
result += f"```{lang}\n{block.strip()}\n```\n\n"
else:
result += block
while i < n:
line = lines[i].rstrip()
i += 1
if line == ".. rst-class:: grammar-block":
print_block("text", strip_links=True)
elif line == ".. code-block:: blueprint":
print_block("blueprint")
elif line == ".. note::":
result += "#### Note\n"
print_block(code=False)
elif m := re.match(r"\.\. image:: (.*)", line):
result += f"})\n"
elif i < n and re.match(r"^((-+)|(~+)|(\++))$", lines[i]):
level_char = lines[i][0]
if level_char not in heading_levels:
heading_levels[level_char] = max(heading_levels.values(), default=1) + 1
result += (
"#" * heading_levels[level_char] + " " + rst_to_md_inline(line) + "\n"
)
i += 1
else:
result += rst_to_md_inline(line) + "\n"
return result
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: collect_sections.py <output_file>")
sys.exit(1)
outfile = sys.argv[1]
load_reference_docs()
# print the sections to a json file
with open(outfile, "w") as f:
json.dump(
{name: section.to_json() for name, section in sections.items()},
f,
indent=2,
sort_keys=True,
)
|