File: test_local_datasets.py

package info (click to toggle)
python-vega-datasets 0.8%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 1,112 kB
  • sloc: python: 625; makefile: 21
file content (86 lines) | stat: -rw-r--r-- 2,190 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import pandas as pd
from pandas.testing import assert_frame_equal
import pytest

from vega_datasets import data, local_data
from vega_datasets.core import Dataset


@pytest.mark.parametrize("name", Dataset.list_local_datasets())
def test_load_local_dataset(name):
    loader = getattr(data, name.replace("-", "_"))
    local_loader = getattr(local_data, name.replace("-", "_"))

    df1 = data(name)
    df2 = loader()  # equivalent to data.dataset_name()
    df3 = local_data(name)
    df4 = local_loader()  # equivalent to local_data.dataset_name()
    assert_frame_equal(df1, df2)
    assert_frame_equal(df1, df3)
    assert_frame_equal(df1, df4)

    raw1 = loader.raw()
    raw2 = local_loader.raw()
    raw3 = data(name, return_raw=True)
    raw4 = local_data(name, return_raw=True)
    assert raw1 == raw2 == raw3 == raw4
    assert type(raw1) is type(raw2) is type(raw3) is type(raw4) is bytes


def test_iris_column_names():
    iris = data.iris()
    assert type(iris) is pd.DataFrame
    assert sorted(iris.columns) == [
        "petalLength",
        "petalWidth",
        "sepalLength",
        "sepalWidth",
        "species",
    ]

    iris = data.iris.raw()
    assert type(iris) is bytes


def test_stocks_column_names():
    stocks = data.stocks()
    assert type(stocks) is pd.DataFrame
    assert sorted(stocks.columns) == ["date", "price", "symbol"]

    stocks = data.stocks.raw()
    assert type(stocks) is bytes


def test_cars_column_names():
    cars = data.cars()
    assert type(cars) is pd.DataFrame
    assert sorted(cars.columns) == [
        "Acceleration",
        "Cylinders",
        "Displacement",
        "Horsepower",
        "Miles_per_Gallon",
        "Name",
        "Origin",
        "Weight_in_lbs",
        "Year",
    ]

    cars = data.cars.raw()
    assert type(cars) is bytes


@pytest.mark.parametrize(
    "name,col",
    [
        ("cars", "Year"),
        ("stocks", "date"),
        ("iowa-electricity", "year"),
        ("seattle-weather", "date"),
        ("seattle-temps", "date"),
        ("sf-temps", "date"),
        ("ohlc", "date"),
    ],
)
def test_date_types(name, col):
    assert data(name)[col].dtype == "datetime64[ns]"