File: eval.py

package info (click to toggle)
pandas 2.2.3%2Bdfsg-9
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 66,784 kB
  • sloc: python: 422,228; ansic: 9,190; sh: 270; xml: 102; makefile: 83
file content (65 lines) | stat: -rw-r--r-- 1,988 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import numpy as np

import pandas as pd

try:
    import pandas.core.computation.expressions as expr
except ImportError:
    import pandas.computation.expressions as expr


class Eval:
    params = [["numexpr", "python"], [1, "all"]]
    param_names = ["engine", "threads"]

    def setup(self, engine, threads):
        self.df = pd.DataFrame(np.random.randn(20000, 100))
        self.df2 = pd.DataFrame(np.random.randn(20000, 100))
        self.df3 = pd.DataFrame(np.random.randn(20000, 100))
        self.df4 = pd.DataFrame(np.random.randn(20000, 100))

        if threads == 1:
            expr.set_numexpr_threads(1)

    def time_add(self, engine, threads):
        pd.eval("self.df + self.df2 + self.df3 + self.df4", engine=engine)

    def time_and(self, engine, threads):
        pd.eval(
            "(self.df > 0) & (self.df2 > 0) & (self.df3 > 0) & (self.df4 > 0)",
            engine=engine,
        )

    def time_chained_cmp(self, engine, threads):
        pd.eval("self.df < self.df2 < self.df3 < self.df4", engine=engine)

    def time_mult(self, engine, threads):
        pd.eval("self.df * self.df2 * self.df3 * self.df4", engine=engine)

    def teardown(self, engine, threads):
        expr.set_numexpr_threads()


class Query:
    def setup(self):
        N = 10**6
        halfway = (N // 2) - 1
        index = pd.date_range("20010101", periods=N, freq="min")
        s = pd.Series(index)
        self.ts = s.iloc[halfway]
        self.df = pd.DataFrame({"a": np.random.randn(N), "dates": index}, index=index)
        data = np.random.randn(N)
        self.min_val = data.min()
        self.max_val = data.max()

    def time_query_datetime_index(self):
        self.df.query("index < @self.ts")

    def time_query_datetime_column(self):
        self.df.query("dates < @self.ts")

    def time_query_with_boolean_selection(self):
        self.df.query("(a >= @self.min_val) & (a <= @self.max_val)")


from .pandas_vb_common import setup  # noqa: F401 isort:skip