1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
|
import numpy as np
import pandas as pd
class UniqueForLargePyObjectInts:
def setup(self):
lst = [x << 32 for x in range(5000)]
self.arr = np.array(lst, dtype=np.object_)
def time_unique(self):
pd.unique(self.arr)
class Float64GroupIndex:
# GH28303
def setup(self):
self.df = pd.date_range(
start="1/1/2018", end="1/2/2018", periods=10**6
).to_frame()
self.group_index = np.round(self.df.index.astype(int) / 10**9)
def time_groupby(self):
self.df.groupby(self.group_index).last()
class UniqueAndFactorizeArange:
params = range(4, 16)
param_names = ["exponent"]
def setup(self, exponent):
a = np.arange(10**4, dtype="float64")
self.a2 = (a + 10**exponent).repeat(100)
def time_factorize(self, exponent):
pd.factorize(self.a2)
def time_unique(self, exponent):
pd.unique(self.a2)
class Unique:
params = ["Int64", "Float64"]
param_names = ["dtype"]
def setup(self, dtype):
self.ser = pd.Series(([1, pd.NA, 2] + list(range(100_000))) * 3, dtype=dtype)
self.ser_unique = pd.Series(list(range(300_000)) + [pd.NA], dtype=dtype)
def time_unique_with_duplicates(self, exponent):
pd.unique(self.ser)
def time_unique(self, exponent):
pd.unique(self.ser_unique)
class NumericSeriesIndexing:
params = [
(np.int64, np.uint64, np.float64),
(10**4, 10**5, 5 * 10**5, 10**6, 5 * 10**6),
]
param_names = ["dtype", "N"]
def setup(self, dtype, N):
vals = np.array(list(range(55)) + [54] + list(range(55, N - 1)), dtype=dtype)
indices = pd.Index(vals)
self.data = pd.Series(np.arange(N), index=indices)
def time_loc_slice(self, index, N):
# trigger building of mapping
self.data.loc[:800]
class NumericSeriesIndexingShuffled:
params = [
(np.int64, np.uint64, np.float64),
(10**4, 10**5, 5 * 10**5, 10**6, 5 * 10**6),
]
param_names = ["dtype", "N"]
def setup(self, dtype, N):
vals = np.array(list(range(55)) + [54] + list(range(55, N - 1)), dtype=dtype)
np.random.shuffle(vals)
indices = pd.Index(vals)
self.data = pd.Series(np.arange(N), index=indices)
def time_loc_slice(self, index, N):
# trigger building of mapping
self.data.loc[:800]
|