1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
|
#!/usr/bin/python3
#
# Copyright 2022 UBports Foundation.
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 3, as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranties of
# MERCHANTABILITY, SATISFACTORY QUALITY, or FITNESS FOR A PARTICULAR
# PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
import requests, os, threading, queue
from bs4 import BeautifulSoup
from urllib import request
url = "http://download.geonames.org/export/dump/alternatenames/"
res = requests.get(url)
so = BeautifulSoup(res.text, "html.parser")
pre = so.pre
all_a = pre.find_all("a")
if not os.path.exists("alternatenames"):
os.mkdir("alternatenames")
if not os.path.exists("alternatenames_tmp"):
os.mkdir("alternatenames_tmp")
q = queue.Queue()
for a in all_a:
if not a.string.endswith(".zip"):
continue
q.put_nowait(a.string)
class Worker(threading.Thread):
def __init__(self, q, *args, **kwargs):
self.q = q
super().__init__(*args, **kwargs)
def run(self):
while True:
try:
lang = self.q.get(timeout=3)
except queue.Empty:
return
print("Downloading {} from {} to {}".format(lang, "{}/{}".format(url, lang), "alternatenames_tmp/{}".format(lang)))
request.urlretrieve("{}/{}".format(url, lang), "alternatenames_tmp/{}".format(lang))
print("Done downloading {}".format(lang))
from zipfile import ZipFile
print("Extracting {}".format(lang))
with ZipFile("alternatenames_tmp/{}".format(lang), 'r') as zipObj:
zipObj.extractall('alternatenames')
print("Done extracting {}".format(lang))
os.remove("alternatenames_tmp/{}".format(lang))
self.q.task_done()
for _ in range(20):
Worker(q).start()
q.join()
os.rmdir("alternatenames_tmp")
|