1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
|
# ______________________________________________________________________
"""Module pytokenize
This is a modified version of Ka-Ping Yee's tokenize module found in the
Python standard library.
The primary modification is the removal of the tokenizer's dependence on the
standard Python regular expression module, which is written in C. The regular
expressions have been replaced with hand built DFA's using the
basil.util.automata module.
"""
# ______________________________________________________________________
from pypy.interpreter.pyparser import automata
from pypy.interpreter.pyparser.dfa_generated import *
__all__ = [ "tokenize" ]
endDFAs = {"'" : singleDFA,
'"' : doubleDFA,
'r' : None,
'R' : None,
"u" : None,
"U" : None,
'f' : None,
'F' : None,
'b' : None,
'B' : None}
for uniPrefix in ("", "b", "B", "f", "F"):
for rawPrefix in ("", "r", "R"):
prefix_1 = uniPrefix + rawPrefix
prefix_2 = rawPrefix + uniPrefix
endDFAs[prefix_1 + "'''"] = single3DFA
endDFAs[prefix_1 + '"""'] = double3DFA
endDFAs[prefix_2 + "'''"] = single3DFA
endDFAs[prefix_2 + '"""'] = double3DFA
for uniPrefix in ("u", "U"):
endDFAs[uniPrefix + "'''"] = single3DFA
endDFAs[uniPrefix + '"""'] = double3DFA
whiteSpaceStatesAccepts = [True]
whiteSpaceStates = [{'\t': 0, ' ': 0, '\x0c': 0}]
whiteSpaceDFA = automata.DFA(whiteSpaceStates, whiteSpaceStatesAccepts)
# ______________________________________________________________________
# COPIED:
triple_quoted = {}
for t in ("'''", '"""',
"r'''", 'r"""', "R'''", 'R"""',
"u'''", 'u"""', "U'''", 'U"""',
"f'''", 'f"""', "F'''", 'F"""',
"fr'''", 'fr"""', "Fr'''", 'Fr"""',
"fR'''", 'fR"""', "FR'''", 'FR"""',
"rf'''", 'rf"""', "rF'''", 'rF"""',
"Rf'''", 'Rf"""', "RF'''", 'RF"""',
"b'''", 'b"""', "B'''", 'B"""',
"br'''", 'br"""', "Br'''", 'Br"""',
"bR'''", 'bR"""', "BR'''", 'BR"""',
"rb'''", 'rb"""', "rB'''", 'rB"""',
"Rb'''", 'Rb"""', "RB'''", 'RB"""'):
triple_quoted[t] = t
single_quoted = {}
for t in ("'", '"',
"r'", 'r"', "R'", 'R"',
"u'", 'u"', "U'", 'U"',
"f'", 'f"', "F'", 'F"',
"fr'", 'fr"', "Fr'", 'Fr"',
"fR'", 'fR"', "FR'", 'FR"',
"rf'", 'rf"', "rF'", 'rF"',
"Rf'", 'Rf"', "RF'", 'RF"',
"b'", 'b"', "B'", 'B"',
"br'", 'br"', "Br'", 'Br"',
"bR'", 'bR"', "BR'", 'BR"',
"rb'", 'rb"', "rB'", 'rB"',
"Rb'", 'Rb"', "RB'", 'RB"'):
single_quoted[t] = t
tabsize = 8
alttabsize = 1
|