1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
|
from vbench.api import Benchmark
from datetime import datetime
common_setup = """from pandas_vb_common import *
index = MultiIndex.from_arrays([np.arange(100).repeat(100),
np.roll(np.tile(np.arange(100), 100), 25)])
df = DataFrame(np.random.randn(10000, 4), index=index)
"""
reshape_unstack_simple = Benchmark('df.unstack(1)', common_setup,
start_date=datetime(2011, 10, 1))
setup = common_setup + """
udf = df.unstack(1)
"""
reshape_stack_simple = Benchmark('udf.stack()', setup,
start_date=datetime(2011, 10, 1))
setup = common_setup + """
def unpivot(frame):
N, K = frame.shape
data = {'value' : frame.values.ravel('F'),
'variable' : np.asarray(frame.columns).repeat(N),
'date' : np.tile(np.asarray(frame.index), K)}
return DataFrame(data, columns=['date', 'variable', 'value'])
index = date_range('1/1/2000', periods=10000, freq='h')
df = DataFrame(randn(10000, 50), index=index, columns=range(50))
pdf = unpivot(df)
f = lambda: pdf.pivot('date', 'variable', 'value')
"""
reshape_pivot_time_series = Benchmark('f()', setup,
start_date=datetime(2012, 5, 1))
# Sparse key space, re: #2278
setup = common_setup + """
NUM_ROWS = 1000
for iter in range(10):
df = DataFrame({'A' : np.random.randint(50, size=NUM_ROWS),
'B' : np.random.randint(50, size=NUM_ROWS),
'C' : np.random.randint(-10,10, size=NUM_ROWS),
'D' : np.random.randint(-10,10, size=NUM_ROWS),
'E' : np.random.randint(10, size=NUM_ROWS),
'F' : np.random.randn(NUM_ROWS)})
idf = df.set_index(['A', 'B', 'C', 'D', 'E'])
if len(idf.index.unique()) == NUM_ROWS:
break
"""
unstack_sparse_keyspace = Benchmark('idf.unstack()', setup,
start_date=datetime(2011, 10, 1))
# Melt
setup = common_setup + """
from pandas.core.reshape import melt
df = DataFrame(np.random.randn(10000, 3), columns=['A', 'B', 'C'])
df['id1'] = np.random.randint(0, 10, 10000)
df['id2'] = np.random.randint(100, 1000, 10000)
"""
melt_dataframe = Benchmark("melt(df, id_vars=['id1', 'id2'])", setup,
start_date=datetime(2012, 8, 1))
|