File: stringprep.py

package info (click to toggle)
python-aioxmpp 0.12.2-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 6,152 kB
  • sloc: python: 96,969; xml: 215; makefile: 155; sh: 72
file content (268 lines) | stat: -rw-r--r-- 7,453 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
########################################################################
# File name: stringprep.py
# This file is part of: aioxmpp
#
# LICENSE
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this program.  If not, see
# <http://www.gnu.org/licenses/>.
#
########################################################################
"""
Stringprep support
##################

This module implements the Nodeprep (`RFC 6122`_) and Resourceprep
(`RFC 6122`_) stringprep profiles.

.. autofunction:: nodeprep

.. autofunction:: resourceprep

.. autofunction:: nameprep

.. _RFC 3454: https://tools.ietf.org/html/rfc3454
.. _RFC 6122: https://tools.ietf.org/html/rfc6122

"""

import stringprep

from unicodedata import ucd_3_2_0 as unicodedata

_nodeprep_prohibited = frozenset("\"&'/:<>@")


def is_RandALCat(c):
    return unicodedata.bidirectional(c) in ("R", "AL")


def is_LCat(c):
    return unicodedata.bidirectional(c) == "L"


def check_against_tables(chars, tables):
    """
    Perform a check against the table predicates in `tables`. `tables` must be
    a reusable iterable containing characteristic functions of character sets,
    that is, functions which return :data:`True` if the character is in the
    table.

    The function returns the first character occuring in any of the tables or
    :data:`None` if no character matches.
    """

    for c in chars:
        if any(in_table(c) for in_table in tables):
            return c

    return None


def do_normalization(chars):
    """
    Perform the stringprep normalization. Operates in-place on a list of
    unicode characters provided in `chars`.
    """
    chars[:] = list(unicodedata.normalize("NFKC", "".join(chars)))


def check_bidi(chars):
    """
    Check proper bidirectionality as per stringprep. Operates on a list of
    unicode characters provided in `chars`.
    """

    # the empty string is valid, as it cannot violate the RandALCat constraints
    if not chars:
        return

    # first_is_RorAL = unicodedata.bidirectional(chars[0]) in {"R", "AL"}
    # if first_is_RorAL:

    has_RandALCat = any(is_RandALCat(c) for c in chars)
    if not has_RandALCat:
        return

    has_LCat = any(is_LCat(c) for c in chars)
    if has_LCat:
        raise ValueError("L and R/AL characters must not occur in the same"
                         " string")

    if not is_RandALCat(chars[0]) or not is_RandALCat(chars[-1]):
        raise ValueError("R/AL string must start and end with R/AL character.")


def check_prohibited_output(chars, bad_tables):
    """
    Check against prohibited output, by checking whether any of the characters
    from `chars` are in any of the `bad_tables`.

    Operates in-place on a list of code points from `chars`.
    """
    violator = check_against_tables(chars, bad_tables)
    if violator is not None:
        raise ValueError("Input contains invalid unicode codepoint: "
                         "U+{:04x}".format(ord(violator)))


def check_unassigned(chars, bad_tables):
    """
    Check that `chars` does not contain any unassigned code points as per
    the given list of `bad_tables`.

    Operates on a list of unicode code points provided in `chars`.
    """
    bad_tables = (
        stringprep.in_table_a1,)

    violator = check_against_tables(chars, bad_tables)
    if violator is not None:
        raise ValueError("Input contains unassigned code point: "
                         "U+{:04x}".format(ord(violator)))


def _nodeprep_do_mapping(chars):
    i = 0
    while i < len(chars):
        c = chars[i]
        if stringprep.in_table_b1(c):
            del chars[i]
        else:
            replacement = stringprep.map_table_b2(c)
            if replacement != c:
                chars[i:(i + 1)] = list(replacement)
            i += len(replacement)


def nodeprep(string, allow_unassigned=False):
    """
    Process the given `string` using the Nodeprep (`RFC 6122`_) profile. In the
    error cases defined in `RFC 3454`_ (stringprep), a :class:`ValueError` is
    raised.
    """

    chars = list(string)
    _nodeprep_do_mapping(chars)
    do_normalization(chars)
    check_prohibited_output(
        chars,
        (
            stringprep.in_table_c11,
            stringprep.in_table_c12,
            stringprep.in_table_c21,
            stringprep.in_table_c22,
            stringprep.in_table_c3,
            stringprep.in_table_c4,
            stringprep.in_table_c5,
            stringprep.in_table_c6,
            stringprep.in_table_c7,
            stringprep.in_table_c8,
            stringprep.in_table_c9,
            lambda x: x in _nodeprep_prohibited
        ))
    check_bidi(chars)

    if not allow_unassigned:
        check_unassigned(
            chars,
            (
                stringprep.in_table_a1,
            )
        )

    return "".join(chars)


def _resourceprep_do_mapping(chars):
    i = 0
    while i < len(chars):
        c = chars[i]
        if stringprep.in_table_b1(c):
            del chars[i]
            continue
        i += 1


def resourceprep(string, allow_unassigned=False):
    """
    Process the given `string` using the Resourceprep (`RFC 6122`_) profile. In
    the error cases defined in `RFC 3454`_ (stringprep), a :class:`ValueError`
    is raised.
    """

    chars = list(string)
    _resourceprep_do_mapping(chars)
    do_normalization(chars)
    check_prohibited_output(
        chars,
        (
            stringprep.in_table_c12,
            stringprep.in_table_c21,
            stringprep.in_table_c22,
            stringprep.in_table_c3,
            stringprep.in_table_c4,
            stringprep.in_table_c5,
            stringprep.in_table_c6,
            stringprep.in_table_c7,
            stringprep.in_table_c8,
            stringprep.in_table_c9,
        ))
    check_bidi(chars)

    if not allow_unassigned:
        check_unassigned(
            chars,
            (
                stringprep.in_table_a1,
            )
        )

    return "".join(chars)


def nameprep(string, allow_unassigned=False):
    """
    Process the given `string` using the Nameprep (`RFC 3491`_) profile. In the
    error cases defined in `RFC 3454`_ (stringprep), a :class:`ValueError` is
    raised.
    """

    chars = list(string)
    _nodeprep_do_mapping(chars)
    do_normalization(chars)
    check_prohibited_output(
        chars,
        (
            stringprep.in_table_c12,
            stringprep.in_table_c22,
            stringprep.in_table_c3,
            stringprep.in_table_c4,
            stringprep.in_table_c5,
            stringprep.in_table_c6,
            stringprep.in_table_c7,
            stringprep.in_table_c8,
            stringprep.in_table_c9,
        ))
    check_bidi(chars)

    if not allow_unassigned:
        check_unassigned(
            chars,
            (
                stringprep.in_table_a1,
            )
        )

    return "".join(chars)