File: test_download.py

package info (click to toggle)
python-vega-datasets 0.9%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 1,128 kB
  • sloc: python: 623; makefile: 22
file content (82 lines) | stat: -rw-r--r-- 1,937 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import pandas as pd

import pytest

from vega_datasets import data
from vega_datasets.utils import connection_ok


skip_if_no_internet = pytest.mark.skipif(
    not connection_ok(), reason="No internet connection"
)


@skip_if_no_internet
def test_download_iris():
    iris = data.iris(use_local=False)
    assert type(iris) is pd.DataFrame
    assert sorted(iris.columns) == [
        "petalLength",
        "petalWidth",
        "sepalLength",
        "sepalWidth",
        "species",
    ]

    iris = data.iris.raw(use_local=False)
    assert type(iris) is bytes


def test_stock_date_parsing():
    stocks = data.stocks()
    assert all(stocks.dtypes == ["object", "datetime64[ns]", "float64"])


def test_stock_pivoted():
    stocks = data.stocks(pivoted=True)
    assert stocks.index.name == "date"
    assert sorted(stocks.columns) == ["AAPL", "AMZN", "GOOG", "IBM", "MSFT"]


@skip_if_no_internet
def test_download_stock_parsing():
    stocks = data.stocks(use_local=False)
    assert all(stocks.dtypes == ["object", "datetime64[ns]", "float64"])


@skip_if_no_internet
def test_miserables_parsing():
    miserables = data.miserables()
    assert type(miserables) is tuple
    assert all(type(df) is pd.DataFrame for df in miserables)


@skip_if_no_internet
def test_us_10m_parsing():
    us_10m = data.us_10m()
    assert type(us_10m) is dict


@skip_if_no_internet
def test_world_110m_parsing():
    world_110m = data.world_110m()
    assert type(world_110m) is dict


@skip_if_no_internet
def test_unemployment_tsv():
    unemployment = data.unemployment()
    assert len(unemployment.columns) == 2


@skip_if_no_internet
def test_zipcodes_parsing():
    zipcodes = data.zipcodes()
    assert all(
        zipcodes.columns
        == ["zip_code", "latitude", "longitude", "city", "state", "county"]
    )
    assert all(
        zipcodes.dtypes
        == ["object", "float64", "float64", "object", "object", "object"]
    )