1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
|
#!/usr/bin/env python3
# encoding: utf8
"""
Extracts information from a manpage (read from stdin) that can be useful to
create modules for the code generator.
Example usage:
man 3posix chmod | extract_from_man
Or, in a loop:
rm -f gen.mod;
for f in chmod chown chdir; do
man 3posix $f | extract_from_man >> gen.mod;
done
"""
import sys
import re
def wrap(s, cols, indent = 1):
ns = ''
line = ''
for w in s.split():
if len(line + ' ' + w) > cols:
ns += line + ' \\\n' + '\t' * indent
line = w
else:
if line:
line += ' ' + w
else:
line = w
ns += line
return ns.rstrip()
def extract_sections(f):
"Reads a manpage from the file, returns a dictionary of sections."
sec_name = ''
sec_data = ''
sections = {}
for l in f:
if not l.strip():
continue
if l.startswith((' ', '\t')):
sec_data += l
else:
sections[sec_name] = sec_data
sec_name = l.strip()
sec_data = ''
sections[sec_name] = sec_data
return sections
def get_ret_on_error(sections):
"Tries to find out what the function returns on error."
if 'RETURN VALUE' not in sections:
return None
# remove spaces and newlines to make it easier detect the patterns
s = ' '.join(sections['RETURN VALUE'].split())
print(s)
# Note: the '(-|‐)' regexp matches both the normal minus sign ('-')
# and the UTF-8 hypen sign ('‐', or \xe2\x80\x90); sadly both usually
# look the same
regexps = [
r'On error,? (?P<ev>[-\w]+) is returned',
r'On error,? .* returns? (?P<ev>[-\w]+).',
r'some error occurs,? (?P<ev>[-\w]+) is returned',
r'and (?P<ev>[-\w]+) if an error occurr(s|ed)',
r'[Oo]ther((-|‐) )?wise, (?P<ev>[-\w]+) shall be returned',
r'Other((-|‐) )?wise, the functions shall return (?P<ev>[-\w]+) and'
]
regexps = list(map(re.compile, regexps))
possible_errors = []
for regexp in regexps:
m = regexp.search(s)
if m:
possible_errors.append(m.group('ev'))
return possible_errors
def get_possible_errnos(sections):
"""Tries to find out the possible valid errno values after the
function has failed."""
if 'ERRORS' not in sections:
return None
errnos = []
for l in sections['ERRORS'].split('\n'):
m = re.match(r'\s+(?P<e>([A-Z]{3,},? *)+)\s*', l)
if m:
s = m.group('e').strip()
if not s:
continue
s = [ x.strip() for x in s.split(',') ]
errnos.extend(s)
return errnos
def get_defs(sections):
"Tries to find out the includes and function definitions."
if 'SYNOPSIS' not in sections:
return None
includes = []
funcs = []
fre = re.compile(r'\s+(?P<f>[\w,\*\s]+\(?(\w|,|\*|\s|\.\.\.)*\)?[,;])$')
for l in sections['SYNOPSIS'].split('\n'):
sl = l.strip()
if sl.startswith('#include'):
includes.append(sl.split(' ', 1)[1])
m = fre.match(l.rstrip())
if m:
f = m.group('f')
# long functions are split in multiple lines, this
# tries to detect that and append to the last seen
# function
if funcs and not funcs[-1].endswith(';'):
funcs[-1] += ' ' + f
else:
funcs.append(f)
return (includes, funcs)
if __name__ == '__main__':
if len(sys.argv) > 1:
print(__doc__)
sys.exit(1)
s = extract_sections(sys.stdin)
on_error = get_ret_on_error(s)
errnos = get_possible_errnos(s)
incs, funcs = get_defs(s)
print('\n'.join( 'include: ' + i for i in incs))
print()
print('\n'.join(funcs))
if on_error:
print('\ton error:', ' || '.join(on_error))
if errnos:
print('\tvalid errnos:', wrap(' '.join(sorted(set(errnos))),
60, indent = 2))
|