1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
|
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import cgi, sys, codecs, time, random, os
from ludevit_trans import translator
from fetch import translate_page
logdir='/var/log/ludevit/'
def writelog(text):
"write text to a logfile, text is a plain 8-bit string, not unicode"
if not logdir or not text:
return
remote_addr = os.environ.get('REMOTE_ADDR', '')
try:
if not os.path.exists(logdir):
os.makedirs(logdir)
fname = time.strftime('%Y%m%d_%H%M%S', time.gmtime())+'_%02x'%random.randint(0,0xff)
fname = os.path.join(logdir, fname)
f = file(fname, 'w')
f.write(remote_addr+'\n')
f.write(text)
f.close()
except IOError:
pass
return
def get_user_agent():
agent = os.environ.get('HTTP_USER_AGENT', 'Speccy/82 [en] (ZX Spectrum; U)')
return agent
def header():
r = u'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="sk" xml:lang="sk">
<head>
<title>ludevít</title>
<meta http-equiv="Content-Type"
content="text/html; charset=utf-8" />
<link href="/pics/favicon-stur.ico" rel="shortcut icon" />
</head>
<body>
'''
return r
def footer():
return u'''
<p>
Pozri aj: <a href="/nrs/">Nauka reči Slovenskej</a>
</p>
<p>
Vaše názori posjelajťe na adresu <strong>ludevit @ juls.savba.sk</strong>
</p>
</body>
</html>
'''
def form(text='', nfkd='none'):
text = cgi.escape(text)
r = u'''
<form action="" method="post">
<textarea name="text" cols="70" rows="15" wrap="soft">%s</textarea>
''' % text
if nfkd<>'none':
checked = 'checked'
else:
checked = ''
r += u'''
<br />
Alebo URL ktoruo chceťe preložiť:
<br/>
<input name="url" maxlength="500" size="70" value="http://" />
<br />
<input type="checkbox" name="nfkd" %s />Vísledok zobraz v NFKD normalizácii
<a href="/ludevit/why_nfkd.html">(prečo bi som mau?)</a>
''' % checked
r += u'''
<br />
<input type="submit" value="prelož" />
</form>
'''
return r
def is_valid_url(url):
"protection against malformed urls"
# length restriciton
if len(url)>2047:
return False
# and, an url should not contain control characters
for c in url:
if c<ord(' '):
return False
return True
def fix_url(url):
if url.startswith(r'http:\\'): # stupid, stupid
url = url.replace('\\', '/')
if '://' not in url:
url = 'http://'+url
return url
def translation(text):
if not text:
return ''
r = '''<div>
%s
</div><hr />''' % cgi.escape(text).replace('\n', '<br />')
return r
def init_headers():
sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
print "Content-type: text/html" # HTML is following
print # blank line, end of headers
print header()
def formandfooter():
print form(text='', nfkd=nfkd)
print footer()
f = cgi.FieldStorage()
text = f.getfirst("text", "")
url = f.getfirst("url", "").strip()
do_nfkd = f.getfirst("nfkd", False)
nfkd = 'none'
if do_nfkd:
nfkd = 'hack'
if text:
writelog(text)
init_headers()
try:
text = unicode(text, 'utf-8')[:40000] # safeguard
t = translator.translate_text(text, nfkd)
except UnicodeDecodeError:
t = u'''
Text ňebou v UTF-8 koduvaní. Možno váš browser ňepodporuje
UTF-8. Všetki modernje browseri toto podporujú, skúsťe novú versiu. Ibažebi
sťe k stránke ňepristupovali z običajnjeho počítača, ale z dajakjeho
inšjeho zarjaďeňja, napriklad z PDA, kďe browseri často UTF-8 aňi žjadne
Slovenskje písmeni ňepodporujú. To je nám lúto.
'''
print translation(t)
formandfooter()
elif url and url!='http://' and is_valid_url(url):
url = fix_url(url)
user_agent = get_user_agent()
writelog('URL "%s" || "%s"' % (url, user_agent))
for c in translate_page(url, user_agent):
sys.stdout.write(c)
sys.exit(0)
else:
init_headers()
t = u'''
Toto je automatickí prekladač textu zo spisovnej Slovenčini do štúrovskej.
Napíšťe krátki text v spisovnom nárečí so správnou diakritikou a klikňiťe na «prelož».
'''
print translation(t)
formandfooter()
|