1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
|
from vbench.api import Benchmark
from datetime import datetime
common_setup = """from pandas_vb_common import *
"""
#----------------------------------------------------------------------
# read_csv
setup1 = common_setup + """
index = [rands(10) for _ in xrange(10000)]
df = DataFrame({'float1' : randn(10000),
'float2' : randn(10000),
'string1' : ['foo'] * 10000,
'bool1' : [True] * 10000,
'int1' : np.random.randint(0, 100000, size=10000)},
index=index)
df.to_csv('__test__.csv')
"""
read_csv_standard = Benchmark("read_csv('__test__.csv')", setup1,
start_date=datetime(2011, 9, 15))
#----------------------------------------------------------------------
# write_csv
setup2 = common_setup + """
index = [rands(10) for _ in xrange(10000)]
df = DataFrame({'float1' : randn(10000),
'float2' : randn(10000),
'string1' : ['foo'] * 10000,
'bool1' : [True] * 10000,
'int1' : np.random.randint(0, 100000, size=10000)},
index=index)
"""
write_csv_standard = Benchmark("df.to_csv('__test__.csv')", setup2,
start_date=datetime(2011, 9, 15))
#----------------------------------
setup = common_setup + """
df = DataFrame(np.random.randn(3000, 30))
"""
frame_to_csv = Benchmark("df.to_csv('__test__.csv')", setup,
start_date=datetime(2011, 1, 1))
#----------------------------------
setup = common_setup + """
df=DataFrame({'A':range(50000)})
df['B'] = df.A + 1.0
df['C'] = df.A + 2.0
df['D'] = df.A + 3.0
"""
frame_to_csv2 = Benchmark("df.to_csv('__test__.csv')", setup,
start_date=datetime(2011, 1, 1))
#----------------------------------
setup = common_setup + """
from pandas import concat, Timestamp
def create_cols(name):
return [ "%s%03d" % (name,i) for i in xrange(5) ]
df_float = DataFrame(np.random.randn(5000, 5),dtype='float64',columns=create_cols('float'))
df_int = DataFrame(np.random.randn(5000, 5),dtype='int64',columns=create_cols('int'))
df_bool = DataFrame(True,index=df_float.index,columns=create_cols('bool'))
df_object = DataFrame('foo',index=df_float.index,columns=create_cols('object'))
df_dt = DataFrame(Timestamp('20010101'),index=df_float.index,columns=create_cols('date'))
# add in some nans
df_float.ix[30:500,1:3] = np.nan
df = concat([ df_float, df_int, df_bool, df_object, df_dt ], axis=1)
"""
frame_to_csv_mixed = Benchmark("df.to_csv('__test__.csv')", setup,
start_date=datetime(2012, 6, 1))
#----------------------------------------------------------------------
# parse dates, ISO8601 format
setup = common_setup + """
rng = date_range('1/1/2000', periods=1000)
data = '\\n'.join(rng.map(lambda x: x.strftime("%Y-%m-%d %H:%M:%S")))
"""
stmt = ("read_csv(StringIO(data), header=None, names=['foo'], "
" parse_dates=['foo'])")
read_parse_dates_iso8601 = Benchmark(stmt, setup,
start_date=datetime(2012, 3, 1))
setup = common_setup + """
rng = date_range('1/1/2000', periods=1000)
data = DataFrame(rng, index=rng)
"""
stmt = ("data.to_csv('__test__.csv', date_format='%Y%m%d')")
frame_to_csv_date_formatting = Benchmark(stmt, setup,
start_date=datetime(2013, 9, 1))
#----------------------------------------------------------------------
# infer datetime format
setup = common_setup + """
rng = date_range('1/1/2000', periods=1000)
data = '\\n'.join(rng.map(lambda x: x.strftime("%Y-%m-%d %H:%M:%S")))
"""
stmt = ("read_csv(StringIO(data), header=None, names=['foo'], "
" parse_dates=['foo'], infer_datetime_format=True)")
read_csv_infer_datetime_format_iso8601 = Benchmark(stmt, setup)
setup = common_setup + """
rng = date_range('1/1/2000', periods=1000)
data = '\\n'.join(rng.map(lambda x: x.strftime("%Y%m%d")))
"""
stmt = ("read_csv(StringIO(data), header=None, names=['foo'], "
" parse_dates=['foo'], infer_datetime_format=True)")
read_csv_infer_datetime_format_ymd = Benchmark(stmt, setup)
setup = common_setup + """
rng = date_range('1/1/2000', periods=1000)
data = '\\n'.join(rng.map(lambda x: x.strftime("%m/%d/%Y %H:%M:%S.%f")))
"""
stmt = ("read_csv(StringIO(data), header=None, names=['foo'], "
" parse_dates=['foo'], infer_datetime_format=True)")
read_csv_infer_datetime_format_custom = Benchmark(stmt, setup)
|