1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
|
#!/usr/bin/env python
##
## Name: mkdoc.py
## Purpose: Extract documentation from header files.
##
## Copyright (C) 2018 Michael J. Fromberger. All Rights Reserved.
##
## Usage: mkdoc.py <template> <output>
##
from __future__ import print_function
import collections, re, sys
# A regular expression to match commented declarations.
# This is specific to C and not very general; it should work fine for the imath
# headers but will not adapt well to arbitrary code or to C++.
doc = re.compile(r'''(?mx)/\*\* # open /**
(?P<text>(?:[^*]|\*[^/])*) # text Does a thing
\*/\n # close */
(?P<decl>[^;{]*(?:;$|\{))''') # decl void f(x);
# A regular expression matching up to 4 spaces at the head of a line.
spc = re.compile(r'(?m)^ {1,4}')
# A regular expression matching an insertion point. An insertion point has the
# form {{include "header" name ...}}. If no names are given, all the names in
# the given header are inserted.
ins = re.compile(r'{{insert "(?P<file>[^"]*)"(?P<names>(?:\s+\w+)+)?\s*}}')
# A regular expression matching non-identifier characters, for splitting.
nid = re.compile(r'\W+')
# A cache of already-parsed files, maps filename to declarations.
CACHE = {}
def last_word(s):
"""Returns the last identifier-shaped word in s."""
return nid.split(s.strip())[-1]
def typeset(text):
"""Renders text with verbatim sections into markdown."""
lines = []
fence = False
for line in text.split('\n'):
if fence != line.startswith(' '):
lines.append('```')
fence = not fence
lines.append(line)
if fence:
lines.append('```')
for i, line in enumerate(lines):
if i == 0: lines[i] = ' - ' + line
elif line: lines[i] = ' ' + line
return '\n'.join(lines)
class LIndex(object):
"""Represents a line offset index for text."""
def __init__(self, text):
pos = 0
# An array of ending offsets for each line, with a sentinel at position
# 0 to make the index arithmetic easier.
idx = [0]
# Scan forward for newlines or EOF, and push the offsets of the line
# breaks onto the list so we can binary search them later.
while pos < len(text):
next = text.find('\n', pos)
if next < 0:
break
idx.append(next)
pos = next + 1
if idx[-1] < len(text):
idx.append(len(text))
self._len = len(text)
self._index = idx
def linecol(self, pos):
"""Returns the (line, col) corresponding to pos.
Line numbers are 1-based, columns are 0-based.
"""
if pos < 0 or pos > self._len:
raise IndexError("position %d out of range" % pos)
# Binary search for the largest line number whose end marker is at or
# after pos and whose previous line's end is before pos.
idx = self._index
i, j = 1, len(idx)
while i < j:
m = (i + j) / 2
if idx[m] < pos:
i = m + 1
elif idx[m - 1] < pos:
return m, pos - idx[m - 1]
else:
j = m
# This happens if (and only if) the whole file is one line.
return 1, pos
class Decl(object):
"""Represents a single documented declaration."""
def __init__(self, com, decl, line=None):
"""Initialize a new documented declaration.
Params:
com: the raw text of the comment
decl: the raw text of the declaration
line: the line number of the declaration
"""
lp = decl.find('(')
if lp < 0:
self.name = last_word(decl.rstrip(';'))
else:
self.name = last_word(decl[:lp])
self.decl = ' '.join(decl.rstrip(';{').strip().split())
self.comment = spc.sub('', com.rstrip())
self.line = line
def __repr__(self):
return '#Decl["%s"]' % self.decl
def markdown(self, path):
pos = self.decl.index(self.name)
decl = '%s<a href="%s#L%d">%s</a>%s' % (
self.decl[:pos],
path,
self.line,
self.name,
self.decl[pos + len(self.name):],
)
return '''------------
<a id="{name}"></a><pre>
{decl};
</pre>
{comment}
'''.format(name=self.name, decl=decl, comment=typeset(self.comment))
def parse_decls(text):
"""Parse a dictionary of declarations from text."""
decls = collections.OrderedDict()
idx = LIndex(text)
for m in doc.finditer(text):
line, _ = idx.linecol(m.span('decl')[0])
d = Decl(m.group('text'), m.group('decl'), line)
decls[d.name] = d
return decls
def load_file(path):
"""Load declarations from path, or use cached results."""
if path not in CACHE:
with file(path, 'rU') as fp:
CACHE[path] = parse_decls(fp.read())
return CACHE[path]
def main(args):
if len(args) != 2:
print("Usage: mkdoc.py <input> <output>", file=sys.stderr)
sys.exit(1)
doc_template = args[0]
doc_markdown = args[1]
with file(doc_template, 'rU') as input:
template = input.read()
with file(doc_markdown, 'wt') as output:
print(
'''<!--
This file was generated from "{0}" by mkdoc.py
DO NOT EDIT
-->
'''.format(doc_template),
file=output)
pos = 0 # last position of input copied
# Look for substitution markers in the template, and replace them with
# their content.
for ip in ins.finditer(template):
output.write(template[pos:ip.start()])
pos = ip.end()
decls = load_file(ip.group('file'))
if ip.group('names'): # pick the selected names, in order
decls = collections.OrderedDict(
(key, decls[key])
for key in ip.group('names').strip().split())
# Render the selected declarations.
for decl in decls.values():
print(decl.markdown(ip.group('file')), file=output)
# Clean up any remaining template bits
output.write(template[pos:])
if __name__ == "__main__":
main(sys.argv[1:])
|