| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 
 | import numpy as np
import pandas as pd
from pandas import (
    NA,
    Categorical,
    DataFrame,
    Float64Dtype,
    MultiIndex,
    Series,
    Timestamp,
    date_range,
)
try:
    from pandas.tseries.offsets import (
        Hour,
        Nano,
    )
except ImportError:
    # For compatibility with older versions
    from pandas.core.datetools import (
        Hour,
        Nano,
    )
class FromDicts:
    def setup(self):
        N, K = 5000, 50
        self.index = pd.Index([f"i-{i}" for i in range(N)], dtype=object)
        self.columns = pd.Index([f"i-{i}" for i in range(K)], dtype=object)
        frame = DataFrame(np.random.randn(N, K), index=self.index, columns=self.columns)
        self.data = frame.to_dict()
        self.dict_list = frame.to_dict(orient="records")
        self.data2 = {i: {j: float(j) for j in range(100)} for i in range(2000)}
        # arrays which we won't consolidate
        self.dict_of_categoricals = {i: Categorical(np.arange(N)) for i in range(K)}
    def time_list_of_dict(self):
        DataFrame(self.dict_list)
    def time_nested_dict(self):
        DataFrame(self.data)
    def time_nested_dict_index(self):
        DataFrame(self.data, index=self.index)
    def time_nested_dict_columns(self):
        DataFrame(self.data, columns=self.columns)
    def time_nested_dict_index_columns(self):
        DataFrame(self.data, index=self.index, columns=self.columns)
    def time_nested_dict_int64(self):
        # nested dict, integer indexes, regression described in #621
        DataFrame(self.data2)
    def time_dict_of_categoricals(self):
        # dict of arrays that we won't consolidate
        DataFrame(self.dict_of_categoricals)
class FromSeries:
    def setup(self):
        mi = MultiIndex.from_product([range(100), range(100)])
        self.s = Series(np.random.randn(10000), index=mi)
    def time_mi_series(self):
        DataFrame(self.s)
class FromDictwithTimestamp:
    params = [Nano(1), Hour(1)]
    param_names = ["offset"]
    def setup(self, offset):
        N = 10**3
        idx = date_range(Timestamp("1/1/1900"), freq=offset, periods=N)
        df = DataFrame(np.random.randn(N, 10), index=idx)
        self.d = df.to_dict()
    def time_dict_with_timestamp_offsets(self, offset):
        DataFrame(self.d)
class FromRecords:
    params = [None, 1000]
    param_names = ["nrows"]
    # Generators get exhausted on use, so run setup before every call
    number = 1
    repeat = (3, 250, 10)
    def setup(self, nrows):
        N = 100000
        self.gen = ((x, (x * 20), (x * 100)) for x in range(N))
    def time_frame_from_records_generator(self, nrows):
        # issue-6700
        self.df = DataFrame.from_records(self.gen, nrows=nrows)
class FromNDArray:
    def setup(self):
        N = 100000
        self.data = np.random.randn(N)
    def time_frame_from_ndarray(self):
        self.df = DataFrame(self.data)
class FromLists:
    goal_time = 0.2
    def setup(self):
        N = 1000
        M = 100
        self.data = [list(range(M)) for i in range(N)]
    def time_frame_from_lists(self):
        self.df = DataFrame(self.data)
class FromRange:
    goal_time = 0.2
    def setup(self):
        N = 1_000_000
        self.data = range(N)
    def time_frame_from_range(self):
        self.df = DataFrame(self.data)
class FromScalar:
    def setup(self):
        self.nrows = 100_000
    def time_frame_from_scalar_ea_float64(self):
        DataFrame(
            1.0,
            index=range(self.nrows),
            columns=list("abc"),
            dtype=Float64Dtype(),
        )
    def time_frame_from_scalar_ea_float64_na(self):
        DataFrame(
            NA,
            index=range(self.nrows),
            columns=list("abc"),
            dtype=Float64Dtype(),
        )
class FromArrays:
    goal_time = 0.2
    def setup(self):
        N_rows = 1000
        N_cols = 1000
        self.float_arrays = [np.random.randn(N_rows) for _ in range(N_cols)]
        self.sparse_arrays = [
            pd.arrays.SparseArray(np.random.randint(0, 2, N_rows), dtype="float64")
            for _ in range(N_cols)
        ]
        self.int_arrays = [
            pd.array(np.random.randint(1000, size=N_rows), dtype="Int64")
            for _ in range(N_cols)
        ]
        self.index = pd.Index(range(N_rows))
        self.columns = pd.Index(range(N_cols))
    def time_frame_from_arrays_float(self):
        self.df = DataFrame._from_arrays(
            self.float_arrays,
            index=self.index,
            columns=self.columns,
            verify_integrity=False,
        )
    def time_frame_from_arrays_int(self):
        self.df = DataFrame._from_arrays(
            self.int_arrays,
            index=self.index,
            columns=self.columns,
            verify_integrity=False,
        )
    def time_frame_from_arrays_sparse(self):
        self.df = DataFrame._from_arrays(
            self.sparse_arrays,
            index=self.index,
            columns=self.columns,
            verify_integrity=False,
        )
from .pandas_vb_common import setup  # noqa: F401 isort:skip
 |