File: stata.py

package info (click to toggle)
pandas 2.2.3%2Bdfsg-9
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 66,784 kB
  • sloc: python: 422,228; ansic: 9,190; sh: 270; xml: 102; makefile: 83
file content (57 lines) | stat: -rw-r--r-- 1,839 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import numpy as np

from pandas import (
    DataFrame,
    Index,
    date_range,
    read_stata,
)

from ..pandas_vb_common import BaseIO


class Stata(BaseIO):
    params = ["tc", "td", "tm", "tw", "th", "tq", "ty"]
    param_names = ["convert_dates"]

    def setup(self, convert_dates):
        self.fname = "__test__.dta"
        N = self.N = 100000
        C = self.C = 5
        self.df = DataFrame(
            np.random.randn(N, C),
            columns=[f"float{i}" for i in range(C)],
            index=date_range("20000101", periods=N, freq="h"),
        )
        self.df["object"] = Index([f"i-{i}" for i in range(self.N)], dtype=object)
        self.df["int8_"] = np.random.randint(
            np.iinfo(np.int8).min, np.iinfo(np.int8).max - 27, N
        )
        self.df["int16_"] = np.random.randint(
            np.iinfo(np.int16).min, np.iinfo(np.int16).max - 27, N
        )
        self.df["int32_"] = np.random.randint(
            np.iinfo(np.int32).min, np.iinfo(np.int32).max - 27, N
        )
        self.df["float32_"] = np.array(np.random.randn(N), dtype=np.float32)
        self.convert_dates = {"index": convert_dates}
        self.df.to_stata(self.fname, convert_dates=self.convert_dates)

    def time_read_stata(self, convert_dates):
        read_stata(self.fname)

    def time_write_stata(self, convert_dates):
        self.df.to_stata(self.fname, convert_dates=self.convert_dates)


class StataMissing(Stata):
    def setup(self, convert_dates):
        super().setup(convert_dates)
        for i in range(10):
            missing_data = np.random.randn(self.N)
            missing_data[missing_data < 0] = np.nan
            self.df[f"missing_{i}"] = missing_data
        self.df.to_stata(self.fname, convert_dates=self.convert_dates)


from ..pandas_vb_common import setup  # noqa: F401 isort:skip