File: help_parser.py

package info (click to toggle)
crazy-complete 0.3.6-2
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 2,404 kB
  • sloc: python: 7,949; sh: 4,636; makefile: 74
file content (332 lines) | stat: -rw-r--r-- 8,747 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
"""This module contains functions to parse the --help output of a program."""

import re
from collections import namedtuple


# Characters that should not be considered option chars
OPTION_BREAK_CHARS = [' ', '\t', '\n', ',', '|', '=', '[']

# Characters that delimit options
OPTION_DELIMITER_CHARS = [',', '|']

Unparsed = namedtuple('Unparsed', ['text'])
OptionWithMetavar = namedtuple('OptionWithMetavar', ['option', 'metavar', 'optional'])
OptionsWithDescription = namedtuple('OptionsWithDescription', ['options', 'description'])


class CharStream:
    """A utility class for sequentially reading characters from a string."""

    def __init__(self, string, pos = 0):
        self.string = string
        self.len = len(string)
        self.pos = pos

    def peek(self, relative_pos = 0):
        """
        Returns the character at the current position plus an optional relative offset
        without advancing the stream. Returns None if the position is out of bounds.
        """
        try:
            return self.string[self.pos + relative_pos]
        except IndexError:
            return None

    def peek_str(self, length):
        """
        Returns a substring of the specified length starting from the current position
        without advancing the stream.
        """
        return self.string[self.pos:self.pos + length]

    def get(self):
        """Returns the current character and advances the position by one."""
        c = self.string[self.pos]
        self.pos += 1
        return c

    def is_space(self):
        """Checks if the current character is a space or a tab."""
        return self.peek() in (' ', '\t')

    def is_end(self):
        """Checks if the current position has reached the end of the string."""
        return self.pos >= self.len

    def copy(self):
        """Creates and returns a new CharStream object at the current position."""
        return CharStream(self.string, self.pos)

    def __repr__(self):
        line = ''
        i = self.pos
        while i < self.len:
            if self.string[i] == '\n':
                break
            line += self.string[i]
            i += 1
        return f"CharStream({line!r})"


def eat_line(stream):
    '''Read the remainling line and return it (including the newline character).'''
    content = ''
    while not stream.is_end():
        char = stream.get()
        content += char
        if char == '\n':
            break
    return content


def eat_space(stream):
    '''Read spaces and tabs and return it.'''
    content = ''
    while stream.is_space():
        content += stream.get()
    return content


def parse_option_string(stream):
    '''Read an option string and return it.

    All chars except OPTION_BREAK_CHARS are considered valid option chars.

    Example option strings: --help, -h

    If the resulting option string is '-' or '--', it is not considered an option.
    '''
    option = ''
    p = stream.copy()

    eat_space(p)

    if p.peek() != '-':
        return None

    while not p.is_end() and p.peek() not in OPTION_BREAK_CHARS:
        option += p.get()

    if option in ('-', '--'):
        return None

    stream.pos = p.pos
    return option


def parse_bracket(stream):
    '''Read and return a bracketed expression.

    Bracketed expressions are:
        <foo bar>
        [foo bar]
        (foo bar)
        {foo bar}
    '''
    content = stream.peek()
    try:
        closing = {'<':'>', '[':']', '(':')', '{':'}'}[content]
    except KeyError:
        return None

    stream.get()
    while not stream.is_end():
        char = stream.get()
        content += char
        if char == closing:
            break

    return content


def parse_quoted_string(stream):
    '''Read and return a string.

    Strings are:
        'foo bar'
        "foo bar"

    Since it is unlikely that we encounter escape sequences in a description string
    of an option, we don't process any escape sequences.
    '''
    quote = stream.peek()
    if quote not in ('"', "'"):
        return None

    stream.get()
    content = quote
    while not stream.is_end():
        char = stream.get()
        content += char
        if char == quote:
            break

    return content


def parse_metavar(stream):
    '''Read and return a metavar.

    Everything until a tab, space or newline is considered a metavar.

    Special cases:
      - Bracketed expressions (e.g., '<foo bar>') and quoted strings (e.g., '"foo bar"')
        are handled, and the spaces within them are preserved.
      - The function supports metavars enclosed by `<`, `[`, `(`, `{`, as well as
        single (`'`) and double (`"`) quotes.

    Metavars are:
        foo_bar
        'foo bar'
        "foo bar"
        <foo bar>
    '''
    metavar = ''

    while not stream.is_end() and stream.peek() not in (' ', '\t', '\n'):
        if stream.peek() in ('<', '[', '(', '{'):
            metavar += parse_bracket(stream)
        elif stream.peek() in ('"', "'"):
            metavar += parse_quoted_string(stream)
        elif stream.peek() in OPTION_DELIMITER_CHARS:
            break
        else:
            metavar += stream.get()

    return metavar


def parse_trailing_description_line(stream):
    '''Reads and returns a trailing description line.

    A line is considered a trailing description line if it meets the following criteria:
      - It starts with whitespace (indicating continuation from a previous line).
      - It does not begin with a hyphen ('-'), which would indicate the start of a new option.
    '''
    p = stream.copy()

    if not p.is_space():
        return None

    space = eat_space(p)

    if p.peek() == '-' and len(space) < 10:
        return None

    content = eat_line(p)
    stream.pos = p.pos
    return content


def parse_description(stream):
    '''Reads and returns the description of an option.'''
    eat_space(stream)
    content = eat_line(stream)
    while True:
        line = parse_trailing_description_line(stream)
        if line:
            content += line
        else:
            break

    return content


def parse_option_with_metavar(stream):
    '''Read and return an option with its metavar (if any).

    Valid inputs are:
      --option=METAVAR
      --option[=METAVAR] (in this case, 'optional' is set to True)
      --option METAVAR

    Invalid inputs are:
      --option  METAVAR (notice two spaces)
    '''
    opt = parse_option_string(stream)
    metavar = None
    optional = False

    if opt:
        if stream.peek_str(2) == '[=':
            optional = True
            metavar = parse_metavar(stream)

        elif stream.peek() == '=':
            stream.get()
            metavar = parse_metavar(stream)

        # Two spaces after --option means the description follows
        elif stream.peek_str(2).isspace():
            return OptionWithMetavar(opt, metavar, optional)

        # An option delimiter cannot be a metavar
        elif parse_option_delimiter(stream.copy()):
            return OptionWithMetavar(opt, metavar, optional)

        elif not stream.is_end() and stream.is_space():
            stream.get()
            return OptionWithMetavar(opt, parse_metavar(stream), optional)

        return OptionWithMetavar(opt, metavar, optional)

    return None


def parse_option_delimiter(stream):
    '''Parse an option delimiter and return True if it was found, False otherwise.'''
    p = stream.copy()
    eat_space(p)
    if p.get() in (',', '|'):
        stream.pos = p.pos
        return True
    return False


def parse_options_with_description(stream):
    '''Parse options with description.'''
    options = []
    description = None

    while not stream.is_end():
        option = parse_option_with_metavar(stream)
        if option:
            options.append(option)
        else:
            break

        if not parse_option_delimiter(stream):
            break

    if not options:
        return None

    #if s.peek() == '\n' or s.peek_str(2) in ('  ', ' \n'):
    if stream.peek() in (' ', '\t', '\n'):
        description = parse_description(stream)

    return OptionsWithDescription(options, description)


def parse(stream):
    """Parses the stream and returns a list of options with descriptions or unparsed lines."""
    r = []

    while not stream.is_end():
        options = parse_options_with_description(stream)
        if options:
            r.append(options)
        else:
            line = eat_line(stream)
            r.append(Unparsed(line))

    return r


def get_program_name_from_help(string):
    """Extracts the program name from the help string."""
    m = re.match('usage:[\n\t ]+([^\n\t ]+)', string, re.I)
    if m:
        return m[1]
    return string.split()[0]