File: frame_ctor.py

package info (click to toggle)
pandas 0.13.1-2~bpo70%2B1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy-backports
  • size: 48,044 kB
  • sloc: python: 115,757; ansic: 11,490; sh: 311; makefile: 120
file content (57 lines) | stat: -rw-r--r-- 1,677 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from vbench.benchmark import Benchmark
from datetime import datetime

common_setup = """from pandas_vb_common import *
"""

#----------------------------------------------------------------------
# Creation from nested dict

setup = common_setup + """
N, K = 5000, 50
index = [rands(10) for _ in xrange(N)]
columns = [rands(10) for _ in xrange(K)]
frame = DataFrame(np.random.randn(N, K), index=index, columns=columns)

try:
    data = frame.to_dict()
except:
    data = frame.toDict()

some_dict = data.values()[0]
dict_list = [dict(zip(columns, row)) for row in frame.values]
"""

frame_ctor_nested_dict = Benchmark("DataFrame(data)", setup)

# From JSON-like stuff
frame_ctor_list_of_dict = Benchmark("DataFrame(dict_list)", setup,
                                    start_date=datetime(2011, 12, 20))

series_ctor_from_dict = Benchmark("Series(some_dict)", setup)

# nested dict, integer indexes, regression described in #621
setup = common_setup + """
data = dict((i,dict((j,float(j)) for j in xrange(100))) for i in xrange(2000))
"""
frame_ctor_nested_dict_int64 = Benchmark("DataFrame(data)", setup)

# from a mi-series
setup = common_setup + """
mi = MultiIndex.from_tuples([(x,y) for x in range(100) for y in range(100)])
s = Series(randn(10000), index=mi)
"""
frame_from_series = Benchmark("DataFrame(s)", setup)

#----------------------------------------------------------------------
# get_numeric_data

setup = common_setup + """
df = DataFrame(randn(10000, 25))
df['foo'] = 'bar'
df['bar'] = 'baz'
df = df.consolidate()
"""

frame_get_numeric_data = Benchmark('df._get_numeric_data()', setup,
                                   start_date=datetime(2011, 11, 1))