1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
|
"""A helper to download input files needed by assorted encoding tests.
fetch_data_files.py [directory]
Files are downloaded to directory `directory`. If a directory isn't given,
it defaults to the current directory (.).
"""
DATA_URLS = """
http://people.freebsd.org/~perky/i18n/BIG5HKSCS.TXT
http://people.freebsd.org/~perky/i18n/EUC-CN.TXT
http://people.freebsd.org/~perky/i18n/EUC-JISX0213.TXT
http://people.freebsd.org/~perky/i18n/EUC-JP.TXT
http://people.freebsd.org/~perky/i18n/EUC-KR.TXT
http://people.freebsd.org/~perky/i18n/SHIFT_JISX0213.TXT
http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT
http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT
http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT
http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT
http://www.unicode.org/Public/3.2-Update/NormalizationTest-3.2.0.txt
http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/SHIFTJIS.TXT
http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC/JOHAB.TXT
http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT
"""
# Adapted from test_support.open_urlresource() in Python 2.5.
# Fetch the file give by `url` off the web, and store it in directory
# `directory`. The file name is extracted from the last URL component.
# If the file already exists, it's not fetched again.
def fetch_file_from_url(url, directory):
import urllib, urlparse
import os.path
filename = urlparse.urlparse(url)[2].split('/')[-1] # '/': it's a URL!
target = os.path.join(directory, filename)
if os.path.exists(target):
print "\tskipping %r -- already exists" % target
else:
print "\tfetching %s ..." % url
urllib.urlretrieve(url, target)
def main(urls, directory):
print "Downloading data files to %r" % directory
for url in urls.split():
fetch_file_from_url(url, directory)
if __name__ == "__main__":
import sys
n = len(sys.argv)
if n == 1:
directory = "."
elif n == 2:
directory = sys.argv[1]
else:
raise ValueError("no more than one argument allowed")
main(DATA_URLS, directory)
|