File: conv.py

package info (click to toggle)
python-ldap3 2.9.1-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 3,236 kB
  • sloc: python: 30,487; makefile: 3
file content (272 lines) | stat: -rw-r--r-- 9,741 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
"""
"""

# Created on 2014.04.26
#
# Author: Giovanni Cannata
#
# Copyright 2014 - 2020 Giovanni Cannata
#
# This file is part of ldap3.
#
# ldap3 is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# ldap3 is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with ldap3 in the COPYING and COPYING.LESSER files.
# If not, see <http://www.gnu.org/licenses/>.

from base64 import b64encode, b64decode
import datetime
import re

from .. import SEQUENCE_TYPES, STRING_TYPES, NUMERIC_TYPES, get_config_parameter
from ..utils.ciDict import CaseInsensitiveDict
from ..core.exceptions import LDAPDefinitionError


def to_unicode(obj, encoding=None, from_server=False):
    """Try to convert bytes (and str in python2) to unicode.
     Return object unmodified if python3 string, else raise an exception
    """
    conf_default_client_encoding = get_config_parameter('DEFAULT_CLIENT_ENCODING')
    conf_default_server_encoding = get_config_parameter('DEFAULT_SERVER_ENCODING')
    conf_additional_server_encodings = get_config_parameter('ADDITIONAL_SERVER_ENCODINGS')
    conf_additional_client_encodings = get_config_parameter('ADDITIONAL_CLIENT_ENCODINGS')
    if isinstance(obj, NUMERIC_TYPES):
        obj = str(obj)

    if isinstance(obj, (bytes, bytearray)):
        if from_server:  # data from server
            if encoding is None:
                encoding = conf_default_server_encoding
            try:
                return obj.decode(encoding)
            except UnicodeDecodeError:
                for encoding in conf_additional_server_encodings:  # AD could have DN not encoded in utf-8 (even if this is not allowed by RFC4510)
                    try:
                        return obj.decode(encoding)
                    except UnicodeDecodeError:
                        pass
                raise UnicodeError("Unable to convert server data to unicode: %r" % obj)
        else:  # data from client
            if encoding is None:
                encoding = conf_default_client_encoding
            try:
                return obj.decode(encoding)
            except UnicodeDecodeError:
                for encoding in conf_additional_client_encodings:  # tries additional encodings
                    try:
                        return obj.decode(encoding)
                    except UnicodeDecodeError:
                        pass
                raise UnicodeError("Unable to convert client data to unicode: %r" % obj)

    if isinstance(obj, STRING_TYPES):  # python3 strings, python 2 unicode
        return obj

    raise UnicodeError("Unable to convert type %s to unicode: %r" % (obj.__class__.__name__, obj))


def to_raw(obj, encoding='utf-8'):
    """Tries to convert to raw bytes from unicode"""
    if isinstance(obj, NUMERIC_TYPES):
        obj = str(obj)

    if not (isinstance(obj, bytes)):
        if isinstance(obj, SEQUENCE_TYPES):
            return [to_raw(element) for element in obj]
        elif isinstance(obj, STRING_TYPES):
            return obj.encode(encoding)
    return obj


def escape_filter_chars(text, encoding=None):
    """ Escape chars mentioned in RFC4515. """
    if encoding is None:
        encoding = get_config_parameter('DEFAULT_ENCODING')

    try:
        text = to_unicode(text, encoding)
        escaped = text.replace('\\', '\\5c')
        escaped = escaped.replace('*', '\\2a')
        escaped = escaped.replace('(', '\\28')
        escaped = escaped.replace(')', '\\29')
        escaped = escaped.replace('\x00', '\\00')
    except Exception:  # probably raw bytes values, return escaped bytes value
        escaped = to_unicode(escape_bytes(text))
        # escape all octets greater than 0x7F that are not part of a valid UTF-8
        # escaped = ''.join(c if c <= ord(b'\x7f') else escape_bytes(to_raw(to_unicode(c, encoding))) for c in escaped)
    return escaped


def unescape_filter_chars(text, encoding=None):
    """ unescape chars mentioned in RFC4515. """
    if encoding is None:
        encoding = get_config_parameter('DEFAULT_ENCODING')

    unescaped = to_raw(text, encoding)
    unescaped = unescaped.replace(b'\\5c', b'\\')
    unescaped = unescaped.replace(b'\\5C', b'\\')
    unescaped = unescaped.replace(b'\\2a', b'*')
    unescaped = unescaped.replace(b'\\2A', b'*')
    unescaped = unescaped.replace(b'\\28', b'(')
    unescaped = unescaped.replace(b'\\29', b')')
    unescaped = unescaped.replace(b'\\00', b'\x00')
    return unescaped


def escape_bytes(bytes_value):
    """ Convert a byte sequence to a properly escaped for LDAP (format BACKSLASH HEX HEX) string"""
    if bytes_value:
        if str is not bytes:  # Python 3
            if isinstance(bytes_value, str):
                bytes_value = bytearray(bytes_value, encoding='utf-8')
            escaped = '\\'.join([('%02x' % int(b)) for b in bytes_value])
        else:  # Python 2
            if isinstance(bytes_value, unicode):
                bytes_value = bytes_value.encode('utf-8')
            escaped = '\\'.join([('%02x' % ord(b)) for b in bytes_value])
    else:
        escaped = ''

    return ('\\' + escaped) if escaped else ''


def prepare_for_stream(value):
    if str is not bytes:  # Python 3
        return value
    else:  # Python 2
        return value.decode()


def json_encode_b64(obj):
    try:
        return dict(encoding='base64', encoded=b64encode(obj))
    except Exception as e:
        raise LDAPDefinitionError('unable to encode ' + str(obj) + ' - ' + str(e))


# noinspection PyProtectedMember
def check_json_dict(json_dict):
    # needed for python 2

    for k, v in json_dict.items():
        if isinstance(v, dict):
            check_json_dict(v)
        elif isinstance(v, CaseInsensitiveDict):
            check_json_dict(v._store)
        elif isinstance(v, SEQUENCE_TYPES):
            for i, e in enumerate(v):
                if isinstance(e, dict):
                    check_json_dict(e)
                elif isinstance(e, CaseInsensitiveDict):
                    check_json_dict(e._store)
                else:
                    v[i] = format_json(e)
        else:
            json_dict[k] = format_json(v)


def json_hook(obj):
    if hasattr(obj, 'keys') and len(list(obj.keys())) == 2 and 'encoding' in obj.keys() and 'encoded' in obj.keys():
        return b64decode(obj['encoded'])

    return obj


# noinspection PyProtectedMember
def format_json(obj, iso_format=False):
    if isinstance(obj, CaseInsensitiveDict):
        return obj._store

    if isinstance(obj, datetime.datetime):
        return str(obj)

    if isinstance(obj, int):
        return obj

    if isinstance(obj, datetime.timedelta):
        if iso_format:
            return obj.isoformat()
        return str(obj)

    if str is bytes:  # Python 2
        if isinstance(obj, long):  # long exists only in python2
            return obj

    try:
        if str is not bytes:  # Python 3
            if isinstance(obj, bytes):
                # return check_escape(str(obj, 'utf-8', errors='strict'))
                return str(obj, 'utf-8', errors='strict')
            raise LDAPDefinitionError('unable to serialize ' + str(obj))
        else:  # Python 2
            if isinstance(obj, unicode):
                return obj
            else:
                # return unicode(check_escape(obj))
                return unicode(obj)
    except (TypeError, UnicodeDecodeError):
        pass

    try:
        return json_encode_b64(bytes(obj))
    except Exception:
        pass

    raise LDAPDefinitionError('unable to serialize ' + str(obj))


def is_filter_escaped(text):
    if not type(text) == ((str is not bytes) and str or unicode):  # requires str for Python 3 and unicode for Python 2
        raise ValueError('unicode input expected')

    return all(c not in text for c in '()*\0') and not re.search('\\\\([^0-9a-fA-F]|(.[^0-9a-fA-F]))', text)


def ldap_escape_to_bytes(text):
    bytesequence = bytearray()
    i = 0
    try:
        if isinstance(text, STRING_TYPES):
            while i < len(text):
                if text[i] == '\\':
                    if len(text) > i + 2:
                        try:
                            bytesequence.append(int(text[i+1:i+3], 16))
                            i += 3
                            continue
                        except ValueError:
                            pass
                    bytesequence.append(92)  # "\" ASCII code
                else:
                    raw = to_raw(text[i])
                    for c in raw:
                        bytesequence.append(c)
                i += 1
        elif isinstance(text, (bytes, bytearray)):
            while i < len(text):
                if text[i] == 92:  # "\" ASCII code
                    if len(text) > i + 2:
                        try:
                            bytesequence.append(int(text[i + 1:i + 3], 16))
                            i += 3
                            continue
                        except ValueError:
                            pass
                    bytesequence.append(92)  # "\" ASCII code
                else:
                    bytesequence.append(text[i])
                i += 1
    except Exception:
        raise LDAPDefinitionError('badly formatted LDAP byte escaped sequence')

    return bytes(bytesequence)