File: utils.py

package info (click to toggle)
blends 0.7.11
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,260 kB
  • sloc: xml: 4,904; python: 1,226; sh: 705; makefile: 290
file content (43 lines) | stat: -rw-r--r-- 1,374 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import re

def normalizedText(t, n=68, escape = False):
    """
    Given a text with newlines and multiple spaces, reduces all
    multiple separators to single spaces,
    and returns a string with not too long lines.

    Parameters:
    -----------
    
    - t (str) a text to normalize
    - n (int) the maximum length of a line, when possible. Defaults to 68.
    - escape (bool): if True, double quotes will be escaped. Deffauts to False
    """
    if not t:
        return ""
    onestring = re.sub("[\\s\n\t]+",  " ", t, flags=re.MULTILINE)
    if escape:
        # escape double quotes
        onestring = onestring.replace('"', '\\"')
    foundspace=0
    last_foundspace=0
    pos=0
    newline_pos = [0]
    while pos < len(onestring):
        if onestring[pos] == " ":
            foundspace = pos
            if pos - newline_pos[-1] >= n:
                if last_foundspace > newline_pos[-1]:
                    newline_pos.append(last_foundspace+1)
                    pos = last_foundspace + 1
                    foundspace = pos
                else:
                    newline_pos.append(foundspace+1)
            last_foundspace = foundspace
        pos += 1
    result = []
    for i in range(1, len(newline_pos)):
        result.append(onestring[newline_pos[i-1]:newline_pos[i]])
    result.append(onestring[newline_pos[-1]:])
    return result