File: generate_datasets_json.py

package info (click to toggle)
python-vega-datasets 0.9%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 1,128 kB
  • sloc: python: 623; makefile: 22
file content (54 lines) | stat: -rw-r--r-- 1,655 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
"""Tool to re-generate and save vega_datasets/datasets.json

Usage:
$ python generate_datasets_json.py v1.24.0

The second argument is the name of the desired version tag within
http://github.com/vega/vega-datasets/
"""

import json
import os
import subprocess
import sys


def main(tag):
    cwd = os.path.dirname(__file__)
    datasets_src = os.path.join(cwd, "vega-datasets")
    if not os.path.exists(datasets_src):
        print("Cloning vega-datsets...")
        subprocess.check_call(
            ["git", "clone", "http://github.com/vega/vega-datasets.git"], cwd=cwd
        )
    print(f"Checking out '{tag}'")
    subprocess.check_call(["git", "checkout", tag], cwd=datasets_src)

    data_dir = os.path.abspath(os.path.join(datasets_src, "data"))
    datasets_file = os.path.abspath(
        os.path.join(cwd, "..", "vega_datasets", "datasets.json")
    )
    core_file = os.path.abspath(os.path.join(cwd, "..", "vega_datasets", "core.py"))

    print(f"Extracting datasets from {data_dir}")
    datasets = {}
    for filename in os.listdir(data_dir):
        name, fmt = os.path.splitext(filename)
        datasets[name] = {"filename": filename, "format": fmt[1:]}

    print(f"Writing datsets to {datasets_file}")
    with open(datasets_file, "w") as f:
        json.dump(datasets, f, indent=2, sort_keys=True)

    print("Updating SOURCE_TAG in core file")
    subprocess.check_call(
        ["sed", "-i", ".bak", f"s/SOURCE_TAG.*/SOURCE_TAG = {tag!r}/g", core_file]
    )
    subprocess.check_call(["rm", f"{core_file}.bak"])


if __name__ == "__main__":
    if len(sys.argv) == 2:
        main(sys.argv[1])
    else:
        print(__doc__)