1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
|
"""This module contains functions to parse the --help output of a program."""
import re
from collections import namedtuple
# Characters that should not be considered option chars
OPTION_BREAK_CHARS = [' ', '\t', '\n', ',', '|', '=', '[']
# Characters that delimit options
OPTION_DELIMITER_CHARS = [',', '|']
Unparsed = namedtuple('Unparsed', ['text'])
OptionWithMetavar = namedtuple('OptionWithMetavar', ['option', 'metavar', 'optional'])
OptionsWithDescription = namedtuple('OptionsWithDescription', ['options', 'description'])
class CharStream:
"""A utility class for sequentially reading characters from a string."""
def __init__(self, string, pos = 0):
self.string = string
self.len = len(string)
self.pos = pos
def peek(self, relative_pos = 0):
"""
Returns the character at the current position plus an optional relative offset
without advancing the stream. Returns None if the position is out of bounds.
"""
try:
return self.string[self.pos + relative_pos]
except IndexError:
return None
def peek_str(self, length):
"""
Returns a substring of the specified length starting from the current position
without advancing the stream.
"""
return self.string[self.pos:self.pos + length]
def get(self):
"""Returns the current character and advances the position by one."""
c = self.string[self.pos]
self.pos += 1
return c
def is_space(self):
"""Checks if the current character is a space or a tab."""
return self.peek() in (' ', '\t')
def is_end(self):
"""Checks if the current position has reached the end of the string."""
return self.pos >= self.len
def copy(self):
"""Creates and returns a new CharStream object at the current position."""
return CharStream(self.string, self.pos)
def __repr__(self):
line = ''
i = self.pos
while i < self.len:
if self.string[i] == '\n':
break
line += self.string[i]
i += 1
return f"CharStream({line!r})"
def eat_line(stream):
'''Read the remainling line and return it (including the newline character).'''
content = ''
while not stream.is_end():
char = stream.get()
content += char
if char == '\n':
break
return content
def eat_space(stream):
'''Read spaces and tabs and return it.'''
content = ''
while stream.is_space():
content += stream.get()
return content
def parse_option_string(stream):
'''Read an option string and return it.
All chars except OPTION_BREAK_CHARS are considered valid option chars.
Example option strings: --help, -h
If the resulting option string is '-' or '--', it is not considered an option.
'''
option = ''
p = stream.copy()
eat_space(p)
if p.peek() != '-':
return None
while not p.is_end() and p.peek() not in OPTION_BREAK_CHARS:
option += p.get()
if option in ('-', '--'):
return None
stream.pos = p.pos
return option
def parse_bracket(stream):
'''Read and return a bracketed expression.
Bracketed expressions are:
<foo bar>
[foo bar]
(foo bar)
{foo bar}
'''
content = stream.peek()
try:
closing = {'<':'>', '[':']', '(':')', '{':'}'}[content]
except KeyError:
return None
stream.get()
while not stream.is_end():
char = stream.get()
content += char
if char == closing:
break
return content
def parse_quoted_string(stream):
'''Read and return a string.
Strings are:
'foo bar'
"foo bar"
Since it is unlikely that we encounter escape sequences in a description string
of an option, we don't process any escape sequences.
'''
quote = stream.peek()
if quote not in ('"', "'"):
return None
stream.get()
content = quote
while not stream.is_end():
char = stream.get()
content += char
if char == quote:
break
return content
def parse_metavar(stream):
'''Read and return a metavar.
Everything until a tab, space or newline is considered a metavar.
Special cases:
- Bracketed expressions (e.g., '<foo bar>') and quoted strings (e.g., '"foo bar"')
are handled, and the spaces within them are preserved.
- The function supports metavars enclosed by `<`, `[`, `(`, `{`, as well as
single (`'`) and double (`"`) quotes.
Metavars are:
foo_bar
'foo bar'
"foo bar"
<foo bar>
'''
metavar = ''
while not stream.is_end() and stream.peek() not in (' ', '\t', '\n'):
if stream.peek() in ('<', '[', '(', '{'):
metavar += parse_bracket(stream)
elif stream.peek() in ('"', "'"):
metavar += parse_quoted_string(stream)
elif stream.peek() in OPTION_DELIMITER_CHARS:
break
else:
metavar += stream.get()
return metavar
def parse_trailing_description_line(stream):
'''Reads and returns a trailing description line.
A line is considered a trailing description line if it meets the following criteria:
- It starts with whitespace (indicating continuation from a previous line).
- It does not begin with a hyphen ('-'), which would indicate the start of a new option.
'''
p = stream.copy()
if not p.is_space():
return None
space = eat_space(p)
if p.peek() == '-' and len(space) < 10:
return None
content = eat_line(p)
stream.pos = p.pos
return content
def parse_description(stream):
'''Reads and returns the description of an option.'''
eat_space(stream)
content = eat_line(stream)
while True:
line = parse_trailing_description_line(stream)
if line:
content += line
else:
break
return content
def parse_option_with_metavar(stream):
'''Read and return an option with its metavar (if any).
Valid inputs are:
--option=METAVAR
--option[=METAVAR] (in this case, 'optional' is set to True)
--option METAVAR
Invalid inputs are:
--option METAVAR (notice two spaces)
'''
opt = parse_option_string(stream)
metavar = None
optional = False
if opt:
if stream.peek_str(2) == '[=':
optional = True
metavar = parse_metavar(stream)
elif stream.peek() == '=':
stream.get()
metavar = parse_metavar(stream)
# Two spaces after --option means the description follows
elif stream.peek_str(2).isspace():
return OptionWithMetavar(opt, metavar, optional)
# An option delimiter cannot be a metavar
elif parse_option_delimiter(stream.copy()):
return OptionWithMetavar(opt, metavar, optional)
elif not stream.is_end() and stream.is_space():
stream.get()
return OptionWithMetavar(opt, parse_metavar(stream), optional)
return OptionWithMetavar(opt, metavar, optional)
return None
def parse_option_delimiter(stream):
'''Parse an option delimiter and return True if it was found, False otherwise.'''
p = stream.copy()
eat_space(p)
if p.get() in (',', '|'):
stream.pos = p.pos
return True
return False
def parse_options_with_description(stream):
'''Parse options with description.'''
options = []
description = None
while not stream.is_end():
option = parse_option_with_metavar(stream)
if option:
options.append(option)
else:
break
if not parse_option_delimiter(stream):
break
if not options:
return None
#if s.peek() == '\n' or s.peek_str(2) in (' ', ' \n'):
if stream.peek() in (' ', '\t', '\n'):
description = parse_description(stream)
return OptionsWithDescription(options, description)
def parse(stream):
"""Parses the stream and returns a list of options with descriptions or unparsed lines."""
r = []
while not stream.is_end():
options = parse_options_with_description(stream)
if options:
r.append(options)
else:
line = eat_line(stream)
r.append(Unparsed(line))
return r
def get_program_name_from_help(string):
"""Extracts the program name from the help string."""
m = re.match('usage:[\n\t ]+([^\n\t ]+)', string, re.I)
if m:
return m[1]
return string.split()[0]
|