1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
|
"""
Benchmarks in this file depend exclusively on code in _libs/
If a PR does not edit anything in _libs, it is very unlikely that benchmarks
in this file will be affected.
"""
import numpy as np
from pandas._libs import index as libindex
def _get_numeric_engines():
engine_names = [
("Int64Engine", np.int64),
("Int32Engine", np.int32),
("Int16Engine", np.int16),
("Int8Engine", np.int8),
("UInt64Engine", np.uint64),
("UInt32Engine", np.uint32),
("UInt16engine", np.uint16),
("UInt8Engine", np.uint8),
("Float64Engine", np.float64),
("Float32Engine", np.float32),
]
return [
(getattr(libindex, engine_name), dtype)
for engine_name, dtype in engine_names
if hasattr(libindex, engine_name)
]
class NumericEngineIndexing:
params = [
_get_numeric_engines(),
["monotonic_incr", "monotonic_decr", "non_monotonic"],
[True, False],
[10**5, 2 * 10**6], # 2e6 is above SIZE_CUTOFF
]
param_names = ["engine_and_dtype", "index_type", "unique", "N"]
def setup(self, engine_and_dtype, index_type, unique, N):
engine, dtype = engine_and_dtype
if index_type == "monotonic_incr":
if unique:
arr = np.arange(N * 3, dtype=dtype)
else:
values = list([1] * N + [2] * N + [3] * N)
arr = np.array(values, dtype=dtype)
elif index_type == "monotonic_decr":
if unique:
arr = np.arange(N * 3, dtype=dtype)[::-1]
else:
values = list([1] * N + [2] * N + [3] * N)
arr = np.array(values, dtype=dtype)[::-1]
else:
assert index_type == "non_monotonic"
if unique:
arr = np.empty(N * 3, dtype=dtype)
arr[:N] = np.arange(N * 2, N * 3, dtype=dtype)
arr[N:] = np.arange(N * 2, dtype=dtype)
else:
arr = np.array([1, 2, 3] * N, dtype=dtype)
self.data = engine(arr)
# code belows avoids populating the mapping etc. while timing.
self.data.get_loc(2)
self.key_middle = arr[len(arr) // 2]
self.key_early = arr[2]
def time_get_loc(self, engine_and_dtype, index_type, unique, N):
self.data.get_loc(self.key_early)
def time_get_loc_near_middle(self, engine_and_dtype, index_type, unique, N):
# searchsorted performance may be different near the middle of a range
# vs near an endpoint
self.data.get_loc(self.key_middle)
class ObjectEngineIndexing:
params = [("monotonic_incr", "monotonic_decr", "non_monotonic")]
param_names = ["index_type"]
def setup(self, index_type):
N = 10**5
values = list("a" * N + "b" * N + "c" * N)
arr = {
"monotonic_incr": np.array(values, dtype=object),
"monotonic_decr": np.array(list(reversed(values)), dtype=object),
"non_monotonic": np.array(list("abc") * N, dtype=object),
}[index_type]
self.data = libindex.ObjectEngine(arr)
# code belows avoids populating the mapping etc. while timing.
self.data.get_loc("b")
def time_get_loc(self, index_type):
self.data.get_loc("b")
|