File: stat_ops.py

package info (click to toggle)
pandas 0.23.3%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 167,704 kB
  • sloc: python: 230,826; ansic: 11,317; sh: 682; makefile: 133
file content (114 lines) | stat: -rw-r--r-- 3,324 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import numpy as np
import pandas as pd

from .pandas_vb_common import setup  # noqa


ops = ['mean', 'sum', 'median', 'std', 'skew', 'kurt', 'mad', 'prod', 'sem',
       'var']


class FrameOps(object):

    goal_time = 0.2
    params = [ops, ['float', 'int'], [0, 1], [True, False]]
    param_names = ['op', 'dtype', 'axis', 'use_bottleneck']

    def setup(self, op, dtype, axis, use_bottleneck):
        df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype)
        try:
            pd.options.compute.use_bottleneck = use_bottleneck
        except:
            from pandas.core import nanops
            nanops._USE_BOTTLENECK = use_bottleneck
        self.df_func = getattr(df, op)

    def time_op(self, op, dtype, axis, use_bottleneck):
        self.df_func(axis=axis)


class FrameMultiIndexOps(object):

    goal_time = 0.2
    params = ([0, 1, [0, 1]], ops)
    param_names = ['level', 'op']

    def setup(self, level, op):
        levels = [np.arange(10), np.arange(100), np.arange(100)]
        labels = [np.arange(10).repeat(10000),
                  np.tile(np.arange(100).repeat(100), 10),
                  np.tile(np.tile(np.arange(100), 100), 10)]
        index = pd.MultiIndex(levels=levels, labels=labels)
        df = pd.DataFrame(np.random.randn(len(index), 4), index=index)
        self.df_func = getattr(df, op)

    def time_op(self, level, op):
        self.df_func(level=level)


class SeriesOps(object):

    goal_time = 0.2
    params = [ops, ['float', 'int'], [True, False]]
    param_names = ['op', 'dtype', 'use_bottleneck']

    def setup(self, op, dtype, use_bottleneck):
        s = pd.Series(np.random.randn(100000)).astype(dtype)
        try:
            pd.options.compute.use_bottleneck = use_bottleneck
        except:
            from pandas.core import nanops
            nanops._USE_BOTTLENECK = use_bottleneck
        self.s_func = getattr(s, op)

    def time_op(self, op, dtype, use_bottleneck):
        self.s_func()


class SeriesMultiIndexOps(object):

    goal_time = 0.2
    params = ([0, 1, [0, 1]], ops)
    param_names = ['level', 'op']

    def setup(self, level, op):
        levels = [np.arange(10), np.arange(100), np.arange(100)]
        labels = [np.arange(10).repeat(10000),
                  np.tile(np.arange(100).repeat(100), 10),
                  np.tile(np.tile(np.arange(100), 100), 10)]
        index = pd.MultiIndex(levels=levels, labels=labels)
        s = pd.Series(np.random.randn(len(index)), index=index)
        self.s_func = getattr(s, op)

    def time_op(self, level, op):
        self.s_func(level=level)


class Rank(object):

    goal_time = 0.2
    params = [['DataFrame', 'Series'], [True, False]]
    param_names = ['constructor', 'pct']

    def setup(self, constructor, pct):
        values = np.random.randn(10**5)
        self.data = getattr(pd, constructor)(values)

    def time_rank(self, constructor, pct):
        self.data.rank(pct=pct)

    def time_average_old(self, constructor, pct):
        self.data.rank(pct=pct) / len(self.data)


class Correlation(object):

    goal_time = 0.2
    params = ['spearman', 'kendall', 'pearson']
    param_names = ['method']

    def setup(self, method):
        self.df = pd.DataFrame(np.random.randn(1000, 30))

    def time_corr(self, method):
        self.df.corr(method=method)