File: RTF.py

package info (click to toggle)
egenix-mx-base 3.2.8-1
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 8,420 kB
  • ctags: 6,208
  • sloc: ansic: 22,304; python: 18,124; sh: 137; makefile: 121
file content (95 lines) | stat: -rw-r--r-- 2,285 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/local/bin/python

""" RTF - tag a RTF string (Version 0.2)
    
    This version does recursion using the TableInList cmd.
    
    Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com
    Copyright (c) 2000-2014, eGenix.com Software GmbH; mailto:info@egenix.com
    See the documentation for further information on copyrights,
    or contact the author. All Rights Reserved.
"""

import sys

# engine + constants
from mx.TextTools import *

# list of tables (hack to be able to do recursion)
tables = []
# indices
rtf_index = 0

numeral = (# sign ?
           (None,Is,'-',+1),
           (None,AllIn,number)
          )

# XXX: doesn't know how to handle \bin et al. with embedded {}

ctrlword = (# name
            ('name',AllIn,a2z),
            # delimiter
            (None,Is,' ',+1,MatchOk),
            (None,IsIn,number+'-',MatchOk),
             (None,Skip,-1),
             ('param',Table,numeral,+0,MatchOk),
             (None,Is,' ',+1,MatchOk),
              (None,Skip,-1)
           )

hex = set(number+'abcdefABCDEF')
notalpha = set(alpha,0)

ctrlsymbol = (# hexquote
              (None,Is,"'",+3),
               (None,IsInSet,hex),
               (None,IsInSet,hex,MatchFail,MatchOk),
              # other
              (None,IsInSet,notalpha,+1,MatchOk)
             )

rtf = (# control ?
       (None,Is,'\\',+3),
        # word
        ('word',Table,ctrlword,+1,-1),
        # symbol
        ('symbol',Table,ctrlsymbol,+1,-2),
       # closing group
       (None,Is,'}',+2),
        (None,Skip,-1,0,MatchOk),
       # nested group
       (None,Is,'{',+4),
        # recurse
        ('group',TableInList,(tables,rtf_index)),
        (None,Is,'}'),
        (None,Jump,To,-8),
       # document text
       ('text', AllNotIn, '\\{}',+1,-9),
       # EOF
       ('eof',EOF,Here)
      )

# add tables to list
tables.append(rtf)

# note:
# TableInList,(tables,rtf_index)) means: use table tables[rtf_index]

if __name__ == '__main__':

    t = TextTools._timer()

    # read file
    f = open(sys.argv[1])
    text = f.read()

    # tag text
    t.start()
    result, taglist, nextindex = tag(text,rtf)
    t = t.stop()[0]

    print result, nextindex, t,'sec ... hit return to see the tags'
    raw_input()
    print
    print_tags(text,taglist)