File: findlineset.py

package info (click to toggle)
simpleparse 2.1.0a1-6
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd, wheezy
  • size: 2,776 kB
  • ctags: 4,332
  • sloc: python: 7,036; ansic: 6,395; makefile: 22
file content (60 lines) | stat: -rwxr-xr-x 1,550 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
declaration = r"""

firstLine := "This is first line"
secondLine := "This is second line"
fifthLine := "This is fifth line"

<ts>  := [ \t]*

# the actual text strings are included directly
# for the negative versions which is basically to
# avoid the overhead of the name-ref indirection
# (which should be optimised away automatically, but isn't)
set := -firstLine*, firstLine, -secondLine*, secondLine, -fifthLine*, fifthLine
sets := set*
"""

from simpleparse.parser import Parser

p = Parser( declaration, 'set' )

file1 = """This is first line
This is second line
This is NOT first line
This is NOT second line
This is fifth line
This is NOT fifth line
"""
file2 = """This is first line
This is fifth line
This is second line
This is NOT first line
This is NOT second line
This is NOT fifth line
"""
if __name__ == "__main__":
    import pprint
    import time
    pprint.pprint(
        p.parse( file1)
    )
    pprint.pprint(
        p.parse( file2)
    )
    testData = "\n"*30000000 + file1
    print 'starting parse of file 1 with 1 match at end'
    t = time.clock()
    success, results, next = p.parse( testData, "sets")
    print 'finished parse', time.clock()-t
    print 'number of results', len(results)
    pprint.pprint(
        results
    )
    print
    testData = file1 * (30000000/len(file1))
    print 'starting parse of file 1 with ~230,000 matches (slow)'
    t = time.clock()
    success, results, next = p.parse( testData, "sets")
    print 'finished parse', time.clock()-t
    print 'number of results', len(results)