File: download_datasets.py

package info (click to toggle)
python-vega-datasets 0.9%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 1,128 kB
  • sloc: python: 623; makefile: 22
file content (58 lines) | stat: -rw-r--r-- 1,354 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
"""
Tool to download all local datasets and save them within the vega_datasets
source tree.

Usage:
$ python download_datasets.py
"""

import json
from os.path import abspath, join, dirname
import sys
from urllib.request import urlretrieve
import json

sys.path.insert(1, abspath(join(dirname(__file__), "..")))
from vega_datasets.core import Dataset

DATASETS_TO_DOWNLOAD = [
    "airports",
    "anscombe",
    "barley",
    "burtin",
    "cars",
    "crimea",
    "driving",
    "iowa-electricity",
    "iris",
    "la-riots",
    "ohlc",
    "seattle-temps",
    "seattle-weather",
    "sf-temps",
    "stocks",
    "us-employment",
    "wheat",
]


def _download_datasets():
    """Utility to download datasets into package source"""

    def filepath(*args):
        return abspath(join(dirname(__file__), "..", "vega_datasets", *args))

    dataset_listing = {}
    for name in DATASETS_TO_DOWNLOAD:
        data = Dataset(name)
        url = data.url
        filename = filepath("_data", data.filename)
        print("retrieving data {0} -> {1}".format(url, filename))
        urlretrieve(url, filename)
        dataset_listing[name] = "_data/{0}".format(data.filename)
    with open(filepath("local_datasets.json"), "w") as f:
        json.dump(dataset_listing, f, indent=2, sort_keys=True)


if __name__ == "__main__":
    _download_datasets()