1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
|
"""
ldap.schema.tokenizer - Low-level parsing functions for schema element strings
See https://www.python-ldap.org/ for details.
"""
import re
TOKENS_FINDALL = re.compile(
r"(\()" # opening parenthesis
r"|" # or
r"(\))" # closing parenthesis
r"|" # or
r"([^'$()\s]+)" # string of length >= 1 without '$() or whitespace
r"|" # or
r"('.*?'(?!\w))" # any string or empty string surrounded by single quotes
# except if right quote is succeeded by alphanumeric char
r"|" # or
r"([^\s]+?)", # residue, all non-whitespace strings
).findall
def split_tokens(s):
"""
Returns list of syntax elements with quotes and spaces stripped.
"""
parts = []
parens = 0
for opar, cpar, unquoted, quoted, residue in TOKENS_FINDALL(s):
if unquoted:
parts.append(unquoted)
elif quoted:
parts.append(quoted[1:-1])
elif opar:
parens += 1
parts.append(opar)
elif cpar:
parens -= 1
parts.append(cpar)
elif residue == '$':
if not parens:
raise ValueError("'$' outside parenthesis in %r" % (s))
else:
raise ValueError(residue, s)
if parens:
raise ValueError("Unbalanced parenthesis in %r" % (s))
return parts
def extract_tokens(l,known_tokens):
"""
Returns dictionary of known tokens with all values
"""
assert l[0].strip()=="(" and l[-1].strip()==")",ValueError(l)
result = {}
result.update(known_tokens)
i = 0
l_len = len(l)
while i<l_len:
if l[i] in result:
token = l[i]
i += 1 # Consume token
if i<l_len:
if l[i] in result:
# non-valued
result[token] = (())
elif l[i]=="(":
# multi-valued
i += 1 # Consume left parentheses
start = i
while i<l_len and l[i]!=")":
i += 1
result[token] = tuple(filter(lambda v:v!='$',l[start:i]))
i += 1 # Consume right parentheses
else:
# single-valued
result[token] = l[i],
i += 1 # Consume single value
else:
i += 1 # Consume unrecognized item
return result
|