1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
|
import numpy as np
import pandas as pd
from .pandas_vb_common import setup # noqa
ops = ['mean', 'sum', 'median', 'std', 'skew', 'kurt', 'mad', 'prod', 'sem',
'var']
class FrameOps(object):
goal_time = 0.2
params = [ops, ['float', 'int'], [0, 1], [True, False]]
param_names = ['op', 'dtype', 'axis', 'use_bottleneck']
def setup(self, op, dtype, axis, use_bottleneck):
df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype)
try:
pd.options.compute.use_bottleneck = use_bottleneck
except:
from pandas.core import nanops
nanops._USE_BOTTLENECK = use_bottleneck
self.df_func = getattr(df, op)
def time_op(self, op, dtype, axis, use_bottleneck):
self.df_func(axis=axis)
class FrameMultiIndexOps(object):
goal_time = 0.2
params = ([0, 1, [0, 1]], ops)
param_names = ['level', 'op']
def setup(self, level, op):
levels = [np.arange(10), np.arange(100), np.arange(100)]
labels = [np.arange(10).repeat(10000),
np.tile(np.arange(100).repeat(100), 10),
np.tile(np.tile(np.arange(100), 100), 10)]
index = pd.MultiIndex(levels=levels, labels=labels)
df = pd.DataFrame(np.random.randn(len(index), 4), index=index)
self.df_func = getattr(df, op)
def time_op(self, level, op):
self.df_func(level=level)
class SeriesOps(object):
goal_time = 0.2
params = [ops, ['float', 'int'], [True, False]]
param_names = ['op', 'dtype', 'use_bottleneck']
def setup(self, op, dtype, use_bottleneck):
s = pd.Series(np.random.randn(100000)).astype(dtype)
try:
pd.options.compute.use_bottleneck = use_bottleneck
except:
from pandas.core import nanops
nanops._USE_BOTTLENECK = use_bottleneck
self.s_func = getattr(s, op)
def time_op(self, op, dtype, use_bottleneck):
self.s_func()
class SeriesMultiIndexOps(object):
goal_time = 0.2
params = ([0, 1, [0, 1]], ops)
param_names = ['level', 'op']
def setup(self, level, op):
levels = [np.arange(10), np.arange(100), np.arange(100)]
labels = [np.arange(10).repeat(10000),
np.tile(np.arange(100).repeat(100), 10),
np.tile(np.tile(np.arange(100), 100), 10)]
index = pd.MultiIndex(levels=levels, labels=labels)
s = pd.Series(np.random.randn(len(index)), index=index)
self.s_func = getattr(s, op)
def time_op(self, level, op):
self.s_func(level=level)
class Rank(object):
goal_time = 0.2
params = [['DataFrame', 'Series'], [True, False]]
param_names = ['constructor', 'pct']
def setup(self, constructor, pct):
values = np.random.randn(10**5)
self.data = getattr(pd, constructor)(values)
def time_rank(self, constructor, pct):
self.data.rank(pct=pct)
def time_average_old(self, constructor, pct):
self.data.rank(pct=pct) / len(self.data)
class Correlation(object):
goal_time = 0.2
params = ['spearman', 'kendall', 'pearson']
param_names = ['method']
def setup(self, method):
self.df = pd.DataFrame(np.random.randn(1000, 30))
def time_corr(self, method):
self.df.corr(method=method)
|