File: stringtools.py

package info (click to toggle)
brian 2.9.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,872 kB
  • sloc: python: 51,820; cpp: 2,033; makefile: 108; sh: 72
file content (313 lines) | stat: -rw-r--r-- 9,164 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
"""
A collection of tools for string formatting tasks.
"""

import re
import string

__all__ = [
    "indent",
    "deindent",
    "word_substitute",
    "replace",
    "get_identifiers",
    "strip_empty_lines",
    "stripped_deindented_lines",
    "strip_empty_leading_and_trailing_lines",
    "code_representation",
    "SpellChecker",
]


def indent(text, numtabs=1, spacespertab=4, tab=None):
    """
    Indents a given multiline string.

    By default, indentation is done using spaces rather than tab characters.
    To use tab characters, specify the tab character explictly, e.g.::

        indent(text, tab='\t')

    Note that in this case ``spacespertab`` is ignored.

    Examples
    --------
    >>> multiline = '''def f(x):
    ...     return x*x'''
    >>> print(multiline)
    def f(x):
        return x*x
    >>> print(indent(multiline))
        def f(x):
            return x*x
    >>> print(indent(multiline, numtabs=2))
            def f(x):
                return x*x
    >>> print(indent(multiline, spacespertab=2))
      def f(x):
          return x*x
    >>> print(indent(multiline, tab='####'))
    ####def f(x):
    ####    return x*x
    """
    if tab is None:
        tab = " " * spacespertab
    indent = tab * numtabs
    indentedstring = indent + text.replace("\n", f"\n{indent}")
    return indentedstring


def deindent(text, numtabs=None, spacespertab=4, docstring=False):
    """
    Returns a copy of the string with the common indentation removed.

    Note that all tab characters are replaced with ``spacespertab`` spaces.

    If the ``docstring`` flag is set, the first line is treated differently and
    is assumed to be already correctly tabulated.

    If the ``numtabs`` option is given, the amount of indentation to remove is
    given explicitly and not the common indentation.

    Examples
    --------
    Normal strings, e.g. function definitions:

    >>> multiline = '''    def f(x):
    ...          return x**2'''
    >>> print(multiline)
        def f(x):
             return x**2
    >>> print(deindent(multiline))
    def f(x):
         return x**2
    >>> print(deindent(multiline, docstring=True))
        def f(x):
    return x**2
    >>> print(deindent(multiline, numtabs=1, spacespertab=2))
      def f(x):
           return x**2

    Docstrings:

    >>> docstring = '''First docstring line.
    ...     This line determines the indentation.'''
    >>> print(docstring)
    First docstring line.
        This line determines the indentation.
    >>> print(deindent(docstring, docstring=True))
    First docstring line.
    This line determines the indentation.
    """
    text = text.replace("\t", " " * spacespertab)
    lines = text.split("\n")
    # if it's a docstring, we search for the common tabulation starting from
    # line 1, otherwise we use all lines
    if docstring:
        start = 1
    else:
        start = 0
    if docstring and len(lines) < 2:  # nothing to do
        return text
    # Find the minimum indentation level
    if numtabs is not None:
        indentlevel = numtabs * spacespertab
    else:
        lineseq = [
            len(line) - len(line.lstrip())
            for line in lines[start:]
            if len(line.strip())
        ]
        if len(lineseq) == 0:
            indentlevel = 0
        else:
            indentlevel = min(lineseq)
    # remove the common indentation
    lines[start:] = [line[indentlevel:] for line in lines[start:]]
    return "\n".join(lines)


def word_substitute(expr, substitutions):
    """
    Applies a dict of word substitutions.

    The dict ``substitutions`` consists of pairs ``(word, rep)`` where each
    word ``word`` appearing in ``expr`` is replaced by ``rep``. Here a 'word'
    means anything matching the regexp ``\\bword\\b``.

    Examples
    --------

    >>> expr = 'a*_b+c5+8+f(A)'
    >>> print(word_substitute(expr, {'a':'banana', 'f':'func'}))
    banana*_b+c5+8+func(A)
    """
    for var, replace_var in substitutions.items():
        expr = re.sub(f"\\b{var}\\b", str(replace_var), expr)
    return expr


def replace(s, substitutions):
    """
    Applies a dictionary of substitutions. Simpler than `word_substitute`, it
    does not attempt to only replace words
    """
    for before, after in substitutions.items():
        s = s.replace(before, after)
    return s


KEYWORDS = {"and", "or", "not", "True", "False"}


def get_identifiers(expr, include_numbers=False):
    """
    Return all the identifiers in a given string ``expr``, that is everything
    that matches a programming language variable like expression, which is
    here implemented as the regexp ``\\b[A-Za-z_][A-Za-z0-9_]*\\b``.

    Parameters
    ----------
    expr : str
        The string to analyze
    include_numbers : bool, optional
        Whether to include number literals in the output. Defaults to ``False``.

    Returns
    -------
    identifiers : set
        A set of all the identifiers (and, optionally, numbers) in `expr`.

    Examples
    --------
    >>> expr = '3-a*_b+c5+8+f(A - .3e-10, tau_2)*17'
    >>> ids = get_identifiers(expr)
    >>> print(sorted(list(ids)))
    ['A', '_b', 'a', 'c5', 'f', 'tau_2']
    >>> ids = get_identifiers(expr, include_numbers=True)
    >>> print(sorted(list(ids)))
    ['.3e-10', '17', '3', '8', 'A', '_b', 'a', 'c5', 'f', 'tau_2']
    """
    identifiers = set(re.findall(r"\b[A-Za-z_][A-Za-z0-9_]*\b", expr))
    if include_numbers:
        # only the number, not a + or -
        numbers = set(
            re.findall(
                r"(?<=[^A-Za-z_])[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?|^[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?",
                expr,
            )
        )
    else:
        numbers = set()
    return (identifiers - KEYWORDS) | numbers


def strip_empty_lines(s):
    """
    Removes all empty lines from the multi-line string `s`.

    Examples
    --------

    >>> multiline = '''A string with
    ...
    ... an empty line.'''
    >>> print(strip_empty_lines(multiline))
    A string with
    an empty line.
    """
    return "\n".join(line for line in s.split("\n") if line.strip())


def strip_empty_leading_and_trailing_lines(s):
    """
    Removes all empty leading and trailing lines in the multi-line string `s`.
    """
    lines = s.split("\n")
    while lines and not lines[0].strip():
        del lines[0]
    while lines and not lines[-1].strip():
        del lines[-1]
    return "\n".join(lines)


def stripped_deindented_lines(code):
    """
    Returns a list of the lines in a multi-line string, deindented.
    """
    code = deindent(code)
    code = strip_empty_lines(code)
    lines = code.split("\n")
    return lines


def code_representation(code):
    """
    Returns a string representation for several different formats of code

    Formats covered include:
    - A single string
    - A list of statements/strings
    - A dict of strings
    - A dict of lists of statements/strings
    """
    if not isinstance(code, (str, list, tuple, dict)):
        code = str(code)
    if isinstance(code, str):
        return strip_empty_leading_and_trailing_lines(code)
    if not isinstance(code, dict):
        code = {None: code}
    else:
        code = code.copy()
    for k, v in code.items():
        if isinstance(v, (list, tuple)):
            v = "\n".join([str(line) for line in v])
            code[k] = v
    if len(code) == 1 and list(code.keys())[0] is None:
        return strip_empty_leading_and_trailing_lines(list(code.values())[0])
    output = []
    for k, v in code.items():
        msg = f"Key {k}:\n"
        msg += indent(str(v))
        output.append(msg)
    return strip_empty_leading_and_trailing_lines("\n".join(output))


# The below is adapted from Peter Norvig's spelling corrector
# http://norvig.com/spell.py (MIT licensed)
class SpellChecker:
    """
    A simple spell checker that will be used to suggest the correct name if the
    user made a typo (e.g. for state variable names).

    Parameters
    ----------
    words : iterable of str
        The known words
    alphabet : iterable of str, optional
        The allowed characters. Defaults to the characters allowed for
        identifiers, i.e. ascii characters, digits and the underscore.
    """

    def __init__(self, words, alphabet=f"{string.ascii_lowercase + string.digits}_"):
        self.words = words
        self.alphabet = alphabet

    def edits1(self, word):
        s = [(word[:i], word[i:]) for i in range(len(word) + 1)]
        deletes = [a + b[1:] for a, b in s if b]
        transposes = [a + b[1] + b[0] + b[2:] for a, b in s if len(b) > 1]
        replaces = [a + c + b[1:] for a, b in s for c in self.alphabet if b]
        inserts = [a + c + b for a, b in s for c in self.alphabet]
        return set(deletes + transposes + replaces + inserts)

    def known_edits2(self, word):
        return {
            e2 for e1 in self.edits1(word) for e2 in self.edits1(e1) if e2 in self.words
        }

    def known(self, words):
        return {w for w in words if w in self.words}

    def suggest(self, word):
        return self.known(self.edits1(word)) or self.known_edits2(word) or set()