File: RegExp.py

package info (click to toggle)
egenix-mx-base 3.2.8-1
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 8,420 kB
  • ctags: 6,208
  • sloc: ansic: 22,304; python: 18,124; sh: 137; makefile: 121
file content (46 lines) | stat: -rw-r--r-- 1,354 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/local/bin/python

""" RegExp - tag a string using regexps (Version 0.1)
    
    Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com
    Copyright (c) 2000-2014, eGenix.com Software GmbH; mailto:info@egenix.com
    See the documentation for further information on copyrights,
    or contact the author. All Rights Reserved.
"""

# engine + constants
from mx.TextTools import *

# special matching function
def match_regexp(text,position,len_text,regexpr):
    position = position + max(0,regexpr.match(text,position).end())
    return position

# create a table in the sense re_1\|re_2\|...\|re_N where
# re_i \in regexps
def or_regexps(regexps):
    # regexps = list of compiled regexps
    l = []
    for i in range(len(regexps)):
        l.append((i,CallArg,(match_regexp,regexps[i]),+1,MatchOk))
    l.append((None,Fail,Here))
    return tuple(l)

if __name__ == '__main__':

    # create some simple regexps
    import re
    regexps = [ 'spam*', 'ham*', 'eggs' ]
    regexps = map(re.compile, regexps)
    table = or_regexps(regexps)

    text = raw_input('type some words: ')
    result, taglist, nextindex = tag(text,table)

    if result:
        print 'subexpr nr.',taglist[0][0],'matched:',taglist[0]
    else:
        print 'no match'

    if nextindex < len(text): 
        print 'rest of unparsed input:',text[nextindex:]