1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
|
#!/usr/local/bin/python
""" RegExp - tag a string using regexps (Version 0.1)
Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com
Copyright (c) 2000-2014, eGenix.com Software GmbH; mailto:info@egenix.com
See the documentation for further information on copyrights,
or contact the author. All Rights Reserved.
"""
# engine + constants
from mx.TextTools import *
# special matching function
def match_regexp(text,position,len_text,regexpr):
position = position + max(0,regexpr.match(text,position).end())
return position
# create a table in the sense re_1\|re_2\|...\|re_N where
# re_i \in regexps
def or_regexps(regexps):
# regexps = list of compiled regexps
l = []
for i in range(len(regexps)):
l.append((i,CallArg,(match_regexp,regexps[i]),+1,MatchOk))
l.append((None,Fail,Here))
return tuple(l)
if __name__ == '__main__':
# create some simple regexps
import re
regexps = [ 'spam*', 'ham*', 'eggs' ]
regexps = map(re.compile, regexps)
table = or_regexps(regexps)
text = raw_input('type some words: ')
result, taglist, nextindex = tag(text,table)
if result:
print 'subexpr nr.',taglist[0][0],'matched:',taglist[0]
else:
print 'no match'
if nextindex < len(text):
print 'rest of unparsed input:',text[nextindex:]
|