File: text.py

package info (click to toggle)
python-xsdata 24.1-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 2,936 kB
  • sloc: python: 29,257; xml: 404; makefile: 27; sh: 6
file content (233 lines) | stat: -rw-r--r-- 5,250 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
import re
import string
from typing import Any, List, Match, Tuple

stop_words = {
    "",
    "Any",
    "Decimal",
    "Dict",
    "Enum",
    "False",
    "List",
    "Meta",
    "None",
    "Optional",
    "QName",
    "True",
    "Type",
    "Tuple",
    "Union",
    "and",
    "as",
    "assert",
    "async",
    "bool",
    "break",
    "class",
    "continue",
    "def",
    "del",
    "dict",
    "elif",
    "else",
    "except",
    "field",
    "Field",
    "finally",
    "float",
    "for",
    "from",
    "global",
    "if",
    "import",
    "in",
    "int",
    "is",
    "lambda",
    "list",
    "nonlocal",
    "not",
    "object",  # py36 specific
    "or",
    "pass",
    "raise",
    "return",
    "self",
    "str",
    "try",
    "type",
    "while",
    "with",
    "yield",
}

is_reserved = stop_words.__contains__


def prefix(value: str, sep: str = ":") -> str:
    """Return the first part of the string before the separator."""
    return split(value, sep)[0]


def suffix(value: str, sep: str = ":") -> str:
    """Return the last part of the string after the separator."""
    return split(value, sep)[1]


def split(value: str, sep: str = ":") -> Tuple:
    """
    Separate the given string with the given separator and return a tuple of
    the prefix and suffix.

    If the separator isn't present in the string return None as prefix.
    """
    left, _, right = value.partition(sep)
    return (left, right) if right else (None, left)


def capitalize(value: str, **kwargs: Any) -> str:
    """Capitalize the given string."""
    return value[0].upper() + value[1:]


def original_case(value: str, **kwargs: Any) -> str:
    """Return the input string without any modifications."""
    return value


def pascal_case(value: str, **kwargs: Any) -> str:
    """Convert the given string to pascal case."""
    return "".join(map(str.title, split_words(value)))


def camel_case(value: str, **kwargs: Any) -> str:
    """Convert the given string to camel case."""
    result = "".join(map(str.title, split_words(value)))
    return result[0].lower() + result[1:]


def mixed_case(value: str, **kwargs: Any) -> str:
    """Convert the given string to mixed case."""
    return "".join(split_words(value))


def mixed_pascal_case(value: str, **kwargs: Any) -> str:
    """Convert the given string to mixed pascal case."""
    return capitalize(mixed_case(value))


def mixed_snake_case(value: str, **kwargs: Any) -> str:
    """Convert the given string to mixed snake case."""
    return "_".join(split_words(value))


def snake_case(value: str, **kwargs: Any) -> str:
    """Convert the given string to snake case."""
    return "_".join(map(str.lower, split_words(value)))


def screaming_snake_case(value: str, **kwargs: Any) -> str:
    """Convert the given string to screaming snake case."""
    return snake_case(value, **kwargs).upper()


def kebab_case(value: str, **kwargs: Any) -> str:
    """Convert the given string to kebab case."""
    return "-".join(split_words(value))


def split_words(value: str) -> List[str]:
    """Split a string on new capital letters and not alphanumeric
    characters."""
    words: List[str] = []
    buffer: List[str] = []
    previous = None

    def flush():
        if buffer:
            words.append("".join(buffer))
            buffer.clear()

    for char in value:
        tp = classify(char)
        if tp == StringType.OTHER:
            flush()
        elif not previous or tp == previous:
            buffer.append(char)
        elif tp == StringType.UPPER and previous != StringType.UPPER:
            flush()
            buffer.append(char)
        else:
            buffer.append(char)

        previous = tp

    flush()
    return words


class StringType:
    UPPER = 1
    LOWER = 2
    NUMERIC = 3
    OTHER = 4


def classify(character: str) -> int:
    """String classifier."""
    code_point = ord(character)
    if 64 < code_point < 91:
        return StringType.UPPER

    if 96 < code_point < 123:
        return StringType.LOWER

    if 47 < code_point < 58:
        return StringType.NUMERIC

    return StringType.OTHER


ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
ESCAPE_DCT = {
    "\\": "\\\\",
    '"': '\\"',
    "\b": "\\b",
    "\f": "\\f",
    "\n": "\\n",
    "\r": "\\r",
    "\t": "\\t",
}
for i in range(0x20):
    ESCAPE_DCT.setdefault(chr(i), f"\\u{i:04x}")


def escape_string(value: str) -> str:
    """
    Escape a string for code generation.

    Source: json.encoder.py_encode_basestring
    """

    def replace(match: Match) -> str:
        return ESCAPE_DCT[match.group(0)]

    return ESCAPE.sub(replace, value)


__alnum_ascii__ = set(string.digits + string.ascii_letters)


def alnum(value: str) -> str:
    """Return a lower case version of the string only with ascii alphanumerical
    characters."""
    return "".join(filter(__alnum_ascii__.__contains__, value)).lower()


def variable(value: str) -> str:
    """Returns a version of the string that will be a valid Python variable."""
    # Strip out all characters that are not alphanumeric or underscores
    value = re.sub(r"\W", "", value)
    # Then strip out leading digit and underscore characters
    return re.sub(r"^[^a-zA-Z]+", "", value)