1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
|
import numpy as np
from pandas import (
DatetimeIndex,
Index,
MultiIndex,
Series,
Timestamp,
date_range,
)
def no_change(arr):
return arr
def list_of_str(arr):
return list(arr.astype(str))
def gen_of_str(arr):
return (x for x in arr.astype(str))
def arr_dict(arr):
return dict(zip(range(len(arr)), arr))
def list_of_tuples(arr):
return [(i, -i) for i in arr]
def gen_of_tuples(arr):
return ((i, -i) for i in arr)
def list_of_lists(arr):
return [[i, -i] for i in arr]
def list_of_tuples_with_none(arr):
return [(i, -i) for i in arr][:-1] + [None]
def list_of_lists_with_none(arr):
return [[i, -i] for i in arr][:-1] + [None]
class SeriesConstructors:
param_names = ["data_fmt", "with_index", "dtype"]
params = [
[
no_change,
list,
list_of_str,
gen_of_str,
arr_dict,
list_of_tuples,
gen_of_tuples,
list_of_lists,
list_of_tuples_with_none,
list_of_lists_with_none,
],
[False, True],
["float", "int"],
]
# Generators get exhausted on use, so run setup before every call
number = 1
repeat = (3, 250, 10)
def setup(self, data_fmt, with_index, dtype):
if data_fmt in (gen_of_str, gen_of_tuples) and with_index:
raise NotImplementedError(
"Series constructors do not support using generators with indexes"
)
N = 10**4
if dtype == "float":
arr = np.random.randn(N)
else:
arr = np.arange(N)
self.data = data_fmt(arr)
self.index = np.arange(N) if with_index else None
def time_series_constructor(self, data_fmt, with_index, dtype):
Series(self.data, index=self.index)
class SeriesDtypesConstructors:
def setup(self):
N = 10**4
self.arr = np.random.randn(N)
self.arr_str = np.array(["foo", "bar", "baz"], dtype=object)
self.s = Series(
[Timestamp("20110101"), Timestamp("20120101"), Timestamp("20130101")]
* N
* 10
)
def time_index_from_array_string(self):
Index(self.arr_str)
def time_index_from_array_floats(self):
Index(self.arr)
def time_dtindex_from_series(self):
DatetimeIndex(self.s)
def time_dtindex_from_index_with_series(self):
Index(self.s)
class MultiIndexConstructor:
def setup(self):
N = 10**4
self.iterables = [Index([f"i-{i}" for i in range(N)], dtype=object), range(20)]
def time_multiindex_from_iterables(self):
MultiIndex.from_product(self.iterables)
class DatetimeIndexConstructor:
def setup(self):
N = 20_000
dti = date_range("1900-01-01", periods=N)
self.list_of_timestamps = dti.tolist()
self.list_of_dates = dti.date.tolist()
self.list_of_datetimes = dti.to_pydatetime().tolist()
self.list_of_str = dti.strftime("%Y-%m-%d").tolist()
def time_from_list_of_timestamps(self):
DatetimeIndex(self.list_of_timestamps)
def time_from_list_of_dates(self):
DatetimeIndex(self.list_of_dates)
def time_from_list_of_datetimes(self):
DatetimeIndex(self.list_of_datetimes)
def time_from_list_of_str(self):
DatetimeIndex(self.list_of_str)
from .pandas_vb_common import setup # noqa: F401 isort:skip
|