1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
|
import numpy as np
import pandas as pd
try:
import pandas.core.computation.expressions as expr
except ImportError:
import pandas.computation.expressions as expr
class Eval:
params = [["numexpr", "python"], [1, "all"]]
param_names = ["engine", "threads"]
def setup(self, engine, threads):
self.df = pd.DataFrame(np.random.randn(20000, 100))
self.df2 = pd.DataFrame(np.random.randn(20000, 100))
self.df3 = pd.DataFrame(np.random.randn(20000, 100))
self.df4 = pd.DataFrame(np.random.randn(20000, 100))
if threads == 1:
expr.set_numexpr_threads(1)
def time_add(self, engine, threads):
pd.eval("self.df + self.df2 + self.df3 + self.df4", engine=engine)
def time_and(self, engine, threads):
pd.eval(
"(self.df > 0) & (self.df2 > 0) & (self.df3 > 0) & (self.df4 > 0)",
engine=engine,
)
def time_chained_cmp(self, engine, threads):
pd.eval("self.df < self.df2 < self.df3 < self.df4", engine=engine)
def time_mult(self, engine, threads):
pd.eval("self.df * self.df2 * self.df3 * self.df4", engine=engine)
def teardown(self, engine, threads):
expr.set_numexpr_threads()
class Query:
def setup(self):
N = 10**6
halfway = (N // 2) - 1
index = pd.date_range("20010101", periods=N, freq="min")
s = pd.Series(index)
self.ts = s.iloc[halfway]
self.df = pd.DataFrame({"a": np.random.randn(N), "dates": index}, index=index)
data = np.random.randn(N)
self.min_val = data.min()
self.max_val = data.max()
def time_query_datetime_index(self):
self.df.query("index < @self.ts")
def time_query_datetime_column(self):
self.df.query("dates < @self.ts")
def time_query_with_boolean_selection(self):
self.df.query("(a >= @self.min_val) & (a <= @self.max_val)")
from .pandas_vb_common import setup # noqa: F401 isort:skip
|