File: index.cgi

package info (click to toggle)
ludevit 6.1
  • links: PTS
  • area: main
  • in suites: lenny
  • size: 172 kB
  • ctags: 119
  • sloc: python: 947; makefile: 47
file content (166 lines) | stat: -rw-r--r-- 4,293 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#!/usr/bin/python
# -*- coding: UTF-8 -*- 

import cgi, sys, codecs, time, random, os

from ludevit_trans import translator
from fetch import translate_page

logdir='/var/log/ludevit/'


def writelog(text):
    "write text to a logfile, text is a plain 8-bit string, not unicode"
    if not logdir or not text:
        return
    remote_addr = os.environ.get('REMOTE_ADDR', '')
    try:
        if not os.path.exists(logdir):
            os.makedirs(logdir)
        fname = time.strftime('%Y%m%d_%H%M%S', time.gmtime())+'_%02x'%random.randint(0,0xff)
        fname = os.path.join(logdir, fname)
        f = file(fname, 'w')
        f.write(remote_addr+'\n')
        f.write(text)
        f.close()
    except IOError:
        pass
    return

def get_user_agent():
    agent = os.environ.get('HTTP_USER_AGENT', 'Speccy/82 [en] (ZX Spectrum; U)')
    return agent


def header():
    r = u'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="sk" xml:lang="sk">
<head>
    <title>ludevít</title>
    <meta http-equiv="Content-Type"
        content="text/html; charset=utf-8" />
    <link href="/pics/favicon-stur.ico" rel="shortcut icon" />

</head>

<body>
'''
    return r

def footer():
    return u'''
<p>
Pozri aj: <a href="/nrs/">Nauka reči Slovenskej</a>
</p>
<p>
Vaše názori posjelajťe na adresu <strong>ludevit @ juls.savba.sk</strong>
</p>
</body>
</html>
'''

def form(text='', nfkd='none'):
    text = cgi.escape(text)
    r = u'''
<form action="" method="post">
<textarea name="text" cols="70" rows="15" wrap="soft">%s</textarea>
''' % text
    if nfkd<>'none':
        checked = 'checked'
    else:
        checked = ''
    r += u'''
<br />
Alebo URL ktoruo chceťe preložiť:
<br/>
<input name="url" maxlength="500" size="70" value="http://" />
<br />
<input type="checkbox" name="nfkd" %s />Vísledok zobraz v NFKD normalizácii
<a href="/ludevit/why_nfkd.html">(prečo bi som mau?)</a>
''' % checked

    r += u'''
<br />
<input type="submit" value="prelož" />
</form>
''' 
    return r

def is_valid_url(url):
    "protection against malformed urls"
    # length restriciton
    if len(url)>2047:
	return False
    # and, an url should not contain control characters
    for c in url:
	if c<ord(' '):
	    return False
    return True

def fix_url(url):
    if url.startswith(r'http:\\'): # stupid, stupid
	url = url.replace('\\', '/')
    if '://' not in url:
	url = 'http://'+url
    return url

def translation(text):
    if not text:
        return ''
    r = '''<div>
%s
</div><hr />''' % cgi.escape(text).replace('\n', '<br />')
    return r

def init_headers():
    sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
    print "Content-type: text/html"     # HTML is following
    print                               # blank line, end of headers
    print header()

def formandfooter():
    print form(text='', nfkd=nfkd)
    print footer()

f = cgi.FieldStorage()

text = f.getfirst("text", "")
url = f.getfirst("url", "").strip()
do_nfkd = f.getfirst("nfkd", False)
nfkd = 'none'
if do_nfkd:
    nfkd = 'hack'

if text:
    writelog(text)
    init_headers()
    try:
        text = unicode(text, 'utf-8')[:40000] # safeguard
        t = translator.translate_text(text, nfkd)
    except UnicodeDecodeError:
        t = u'''
Text ňebou v UTF-8 koduvaní. Možno váš browser ňepodporuje
UTF-8. Všetki modernje browseri toto podporujú, skúsťe novú versiu. Ibažebi
sťe k stránke ňepristupovali z običajnjeho počítača, ale z dajakjeho
inšjeho zarjaďeňja, napriklad z PDA, kďe browseri často UTF-8 aňi žjadne
Slovenskje písmeni ňepodporujú. To je nám lúto.
'''
    print translation(t)
    formandfooter()
elif url and url!='http://' and is_valid_url(url):
    url = fix_url(url)
    user_agent = get_user_agent()
    writelog('URL "%s" || "%s"' % (url, user_agent))
    for c in translate_page(url, user_agent):
	sys.stdout.write(c)
    sys.exit(0)
else:
    init_headers()
    t = u'''
Toto je automatickí prekladač textu zo spisovnej Slovenčini do štúrovskej.
Napíšťe krátki text v spisovnom nárečí so správnou diakritikou a klikňiťe na «prelož».
'''
    print translation(t)
    formandfooter()