File: text_to_markup.py

package info (click to toggle)
evolvotron 0.6.1-1%2Bwheezy1
  • links: PTS
  • area: main
  • in suites: wheezy
  • size: 1,144 kB
  • sloc: cpp: 9,512; python: 163; sh: 157; makefile: 48
file content (205 lines) | stat: -rwxr-xr-x 6,643 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
#!/usr/bin/env python

# Convert Tim-style text to html or qml
#
# The Rules:
# - Lines with all upper case words to h2 or h3 capwords depending on next line underlining (first to h1/title though)
#   (must be 3 chars or more) 
#   (todo: relax to not all upper case... no need to capwords if not)
# - Other text to p, blank lines break a p
# - Lines beginning with "- " (NB space) to ul/li (bulleted)
# - Lines beginning with "-?" (no space) to ul/li (?) with <br> at end of first line
# - Lines ending with ":" <br> 
# - Words delim <xxx> to <i>xxx</i>
# "$ " at start of line indicates one line of code (add <br> too)

import sys
import string
import re

def line_of_dashes(n):
    r=""
    for i in xrange(n):
        r+="-"
    return r

def line_of_equals(n):
    r=""
    for i in xrange(n):
        r+="="
    return r

class TextToMarkup:

    def __init__(self,m,s):        
        self.startup=1       # True
        self.scope_p=0       # False
        self.scope_ul=0      # False
        self.scope_li=0      # False
        self.done_title=0    # False
        self.skipnextline=0  # False
        self.mode=m
        self.stringify=s

    def dispose(self,l):
        if self.stringify:
            self.output.write("\"")       # Actually, they should all have been &quot;-ed anyway
            for c in l:
                if c=="\"":
                    self.output.write("\\\"")
                else:
                    self.output.write(c)
            self.output.write("\\n\"\n")
        else:
            self.output.write(l+"\n")

    def process_word(self,w):
        r=""
        if len(w)<3:                      # Special case allows "<" or "<>" without turning italic
            for i in xrange(len(w)):
                if w[i]=="<":
                    r+="&lt;"
                elif w[i]==">":
                    r+="&gt;"
                else:
                    r+=w[i]
        else:
            for i in xrange(len(w)):
                if w[i]=="<":
                    r+="<i>"
                elif w[i]==">":
                    r+="</i>"
                elif w[i]=='"':
                    r+="&quot;"
                elif w[i]=="&":
                    r+="&amp;"
                else:
                    r+=w[i]
        return r

    def process_paragraph_text(self,txt):

        is_code=0 # False
        specialbreak=txt[len(txt)-1]==":" 
        r="  "

        if txt[0]=="-":
            if txt[1]==" ":
                txt=txt[2:]
            else:
                specialbreak=1 # True
            if self.scope_ul and self.scope_li:
                r+="</li>"
                self.scope_li=0 # False
            if not self.scope_ul:
                r+="<ul>"
                self.scope_ul=1 # True
            if not self.scope_li:
                r+="<li>"
                self.scope_li=1 # True

        elif txt[0]=="$":
            is_code=1           # True
            r+="<code>"
            txt=txt[2:]
            specialbreak=1      # True

        for w in txt.split():
            r+=self.process_word(w)
            r+=" "
        if is_code:
            r+="</code>"
        if specialbreak:
            r+="<br>"
        return r

    def process(self,in_stream,out_stream):
        self.output=out_stream
        self.input=in_stream

        if self.mode=="html":
            self.dispose("<html>")

        while 1:    # True
 
            if self.startup:
                self.currline_raw=in_stream.readline()
                self.nextline_raw=in_stream.readline()
                self.startup=0   # False
            else:
                self.currline_raw=self.nextline_raw            
                self.nextline_raw=in_stream.readline()

            if not self.currline_raw:
                break

            if self.skipnextline:
                self.skipnextline=0 # False
                continue

            # Should track last line too
            self.currline=self.currline_raw.strip()
            self.nextline=self.nextline_raw.strip()

            if len(self.currline)>2 and self.nextline==line_of_equals(len(self.currline)):
                if self.done_title:
                    self.dispose("<h2>"+string.capwords(self.currline)+"</h2>")
                    self.skipnextline=1 # True
                    continue
                else:
                    if (self.mode=="html"):
                        self.dispose("<head>")
                        self.dispose("<!--- AUTOMATICALLY GENERATED FILE : DO NOT EDIT --->")
                        self.dispose("<title>"+string.capwords(self.currline)+"</title>")
                        self.dispose("</head>")
                        self.dispose("<body>")
                    elif (self.mode=="qml"):
                        self.dispose("<qt title='"+string.capwords(self.currline)+"'>")
                    self.dispose("<h1>"+string.capwords(self.currline)+"</h1>")
                    self.done_title=1 # True
                    self.skipnextline=1 # True
                    continue
            elif len(self.currline)>2 and self.nextline==line_of_dashes(len(self.currline)):
                self.dispose("<h3>"+string.capwords(self.currline)+"</h3>")
                self.skipnextline=1 # True
                continue
            elif self.scope_p:
                if (len(self.currline)):
                    self.dispose(self.process_paragraph_text(self.currline))
                else:
                    if self.scope_li:
                        self.dispose("</li>")
                        self.scope_li=0 # False
                    if self.scope_ul:
                        self.dispose("</ul>")
                        self.scope_ul=0 # False                        
                    self.dispose("</p>")
                    self.scope_p=0 # False
            elif len(self.currline):
                self.dispose("<p>")
                self.dispose(self.process_paragraph_text(self.currline))
                self.scope_p=1 # True
            else:
                self.dispose("")

        if self.mode=="html":
            self.dispose("</body>")
            self.dispose("</html>")

#########################################
        
if __name__=='__main__':

    mode=None
    stringify=0 # False
    for i in xrange(1,len(sys.argv)):
        if sys.argv[i]=="-qml":
            mode="qml"
        if sys.argv[i]=="-html":
            mode="html"
        elif sys.argv[i]=="-s":
            stringify=1 # True
            
    t2m=TextToMarkup(mode,stringify)    # "html" and "qml" are alternatives.  Should be stringify option.
    t2m.process(sys.stdin,sys.stdout)