File: io_bench.py

package info (click to toggle)
pandas 0.13.1-2~bpo70%2B1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy-backports
  • size: 48,044 kB
  • sloc: python: 115,757; ansic: 11,490; sh: 311; makefile: 120
file content (133 lines) | stat: -rw-r--r-- 4,536 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from vbench.api import Benchmark
from datetime import datetime

common_setup = """from pandas_vb_common import *
"""

#----------------------------------------------------------------------
# read_csv

setup1 = common_setup + """
index = [rands(10) for _ in xrange(10000)]
df = DataFrame({'float1' : randn(10000),
                'float2' : randn(10000),
                'string1' : ['foo'] * 10000,
                'bool1' : [True] * 10000,
                'int1' : np.random.randint(0, 100000, size=10000)},
               index=index)
df.to_csv('__test__.csv')
"""

read_csv_standard = Benchmark("read_csv('__test__.csv')", setup1,
                              start_date=datetime(2011, 9, 15))


#----------------------------------------------------------------------
# write_csv

setup2 = common_setup + """
index = [rands(10) for _ in xrange(10000)]
df = DataFrame({'float1' : randn(10000),
                'float2' : randn(10000),
                'string1' : ['foo'] * 10000,
                'bool1' : [True] * 10000,
                'int1' : np.random.randint(0, 100000, size=10000)},
               index=index)
"""

write_csv_standard = Benchmark("df.to_csv('__test__.csv')", setup2,
                               start_date=datetime(2011, 9, 15))

#----------------------------------
setup = common_setup + """
df = DataFrame(np.random.randn(3000, 30))
"""
frame_to_csv = Benchmark("df.to_csv('__test__.csv')", setup,
                         start_date=datetime(2011, 1, 1))
#----------------------------------

setup = common_setup + """
df=DataFrame({'A':range(50000)})
df['B'] = df.A + 1.0
df['C'] = df.A + 2.0
df['D'] = df.A + 3.0
"""
frame_to_csv2 = Benchmark("df.to_csv('__test__.csv')", setup,
                         start_date=datetime(2011, 1, 1))

#----------------------------------
setup = common_setup + """
from pandas import concat, Timestamp

def create_cols(name):
    return [ "%s%03d" % (name,i) for i in xrange(5) ]
df_float  = DataFrame(np.random.randn(5000, 5),dtype='float64',columns=create_cols('float'))
df_int    = DataFrame(np.random.randn(5000, 5),dtype='int64',columns=create_cols('int'))
df_bool   = DataFrame(True,index=df_float.index,columns=create_cols('bool'))
df_object = DataFrame('foo',index=df_float.index,columns=create_cols('object'))
df_dt     = DataFrame(Timestamp('20010101'),index=df_float.index,columns=create_cols('date'))

# add in some nans
df_float.ix[30:500,1:3] = np.nan

df        = concat([ df_float, df_int, df_bool, df_object, df_dt ], axis=1)

"""
frame_to_csv_mixed = Benchmark("df.to_csv('__test__.csv')", setup,
                               start_date=datetime(2012, 6, 1))

#----------------------------------------------------------------------
# parse dates, ISO8601 format

setup = common_setup + """
rng = date_range('1/1/2000', periods=1000)
data = '\\n'.join(rng.map(lambda x: x.strftime("%Y-%m-%d %H:%M:%S")))
"""

stmt = ("read_csv(StringIO(data), header=None, names=['foo'], "
        "         parse_dates=['foo'])")
read_parse_dates_iso8601 = Benchmark(stmt, setup,
                                     start_date=datetime(2012, 3, 1))

setup = common_setup + """
rng = date_range('1/1/2000', periods=1000)
data = DataFrame(rng, index=rng)
"""

stmt = ("data.to_csv('__test__.csv', date_format='%Y%m%d')")

frame_to_csv_date_formatting = Benchmark(stmt, setup,
                                     start_date=datetime(2013, 9, 1))

#----------------------------------------------------------------------
# infer datetime format

setup = common_setup + """
rng = date_range('1/1/2000', periods=1000)
data = '\\n'.join(rng.map(lambda x: x.strftime("%Y-%m-%d %H:%M:%S")))
"""

stmt = ("read_csv(StringIO(data), header=None, names=['foo'], "
        "         parse_dates=['foo'], infer_datetime_format=True)")

read_csv_infer_datetime_format_iso8601 = Benchmark(stmt, setup)

setup = common_setup + """
rng = date_range('1/1/2000', periods=1000)
data = '\\n'.join(rng.map(lambda x: x.strftime("%Y%m%d")))
"""

stmt = ("read_csv(StringIO(data), header=None, names=['foo'], "
        "         parse_dates=['foo'], infer_datetime_format=True)")

read_csv_infer_datetime_format_ymd = Benchmark(stmt, setup)

setup = common_setup + """
rng = date_range('1/1/2000', periods=1000)
data = '\\n'.join(rng.map(lambda x: x.strftime("%m/%d/%Y %H:%M:%S.%f")))
"""

stmt = ("read_csv(StringIO(data), header=None, names=['foo'], "
        "         parse_dates=['foo'], infer_datetime_format=True)")

read_csv_infer_datetime_format_custom = Benchmark(stmt, setup)