1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
|
import urllib2
import urllib
import re
from sgmllib import SGMLParser
url = "http://translate.google.com/translate_dict"
google_url = "http://ajax.googleapis.com/ajax/services/language/translate"
referer_url = "http://edu.kde.org/parley/"
referer = "Referer: "+referer_url
header_referer = ("Referer",referer_url)
param_lang_pair = ("langpair","en|fr")
param_version = ("v","1.0")
class myParser(SGMLParser):
def reset(self):
SGMLParser.reset(self)
self.words = []
self.mystack = []
self.spanstack = []
def unknown_starttag(self,tag,attrs):
self.mystack.append(tag)
print "unknown : ", tag, " ", len(self.mystack)
def start_span(self, attrs):
found = False
for name, value in attrs:
if name == "class" and value == "definition":
self.mystack.append("<translation>")
found = True
if not found:
self.mystack.append("span")
def report_unbalanced(self,tag):
print "unbalanced : ",tag
return
def handle_data(self,data):
if self.mystack[len(self.mystack)-1] == "<translation>":
print "data: ", data
self.words.append(data)
def unknown_endtag(self,tag):
#make it remove if self.mystack[len(self.mystack)-1] != tag
if len(self.mystack) > 0:
if self.mystack[len(self.mystack)-1] == tag:
print "end tag: ", self.mystack[len(self.mystack)-1]
self.mystack.pop()
def parserTest(data):
p = myParser()
p.feed(data)
print p.words
p.close()
def googleTranslation(word):
param_word_trn = ("q",word)
request_url = google_url + "?" + urllib.urlencode([param_version,param_word_trn,param_lang_pair])
print request_url
headers = urllib.urlencode([header_referer])
results = urllib2.urlopen(request_url,headers)
data = results.read()
def googleTranslation2(word):
param_word_trn = ("q",word)
request_url = google_url + "?" + urllib.urlencode([param_version,param_word_trn,param_lang_pair])
print request_url
r = urllib2.Request(request_url)
r.add_header("Referer",referer_url)
results = urllib2.urlopen(r)
data = results.read()
return data
def fetchTranslation(word):
param_word_trn = ("q",word)
request_url = url + "?" + urllib.urlencode([param_word_trn,param_lang_pair])
print request_url
results = urllib2.urlopen(request_url)
data = results.read()
#p = re.compile('<span\ class="definition">(?:<span class="comment">.*</span>)*([^<>]*)<span class="comment">',re.IGNORECASE)
p = re.compile('<span\ class="definition">([^<>]*)<span class="comment">',re.IGNORECASE)
l = p.findall(data)
parserTest(data)
return l
def returnlist():
L = [1,2,3,4,5]
return L
def testTuple():
return [("first","second"),("first1","second1")]
print fetchTranslation("love")
|