1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
|
"""
Tool to download all local datasets and save them within the vega_datasets
source tree.
Usage:
$ python download_datasets.py
"""
import json
from os.path import abspath, join, dirname
import sys
from urllib.request import urlretrieve
import json
sys.path.insert(1, abspath(join(dirname(__file__), "..")))
from vega_datasets.core import Dataset
DATASETS_TO_DOWNLOAD = [
"airports",
"anscombe",
"barley",
"burtin",
"cars",
"crimea",
"driving",
"iowa-electricity",
"iris",
"la-riots",
"ohlc",
"seattle-temps",
"seattle-weather",
"sf-temps",
"stocks",
"us-employment",
"wheat",
]
def _download_datasets():
"""Utility to download datasets into package source"""
def filepath(*args):
return abspath(join(dirname(__file__), "..", "vega_datasets", *args))
dataset_listing = {}
for name in DATASETS_TO_DOWNLOAD:
data = Dataset(name)
url = data.url
filename = filepath("_data", data.filename)
print("retrieving data {0} -> {1}".format(url, filename))
urlretrieve(url, filename)
dataset_listing[name] = "_data/{0}".format(data.filename)
with open(filepath("local_datasets.json"), "w") as f:
json.dump(dataset_listing, f, indent=2, sort_keys=True)
if __name__ == "__main__":
_download_datasets()
|