1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
|
def benchmark_hash_data():
"""
CommandLine:
python ~/code/ubelt/dev/bench_hash.py --convert=True --show
python ~/code/ubelt/dev/bench_hash.py --convert=False --show
"""
import ubelt as ub
#ITEM = 'JUST A STRING' * 100
ITEM = [0, 1, 'a', 'b', ['JUST A STRING'] * 4]
HASHERS = ['sha1', 'sha512', 'xxh32', 'xxh64', 'blake3']
scales = list(range(5, 13))
results = ub.AutoDict()
# Use json is faster or at least as fast it most cases
# xxhash is also significantly faster than sha512
convert = ub.argval('--convert', default='True').lower() == 'True'
print('convert = {!r}'.format(convert))
ti = ub.Timerit(9, bestof=3, verbose=1, unit='ms')
for s in ub.ProgIter(scales, desc='benchmark', verbose=3):
N = 2 ** s
print(' --- s={s}, N={N} --- '.format(s=s, N=N))
data = [ITEM] * N
for hasher in HASHERS:
for timer in ti.reset(hasher):
ub.hash_data(data, hasher=hasher, convert=convert)
results[hasher].update({N: ti.mean()})
col = {h: results[h][N] for h in HASHERS}
sortx = ub.argsort(col)
ranking = ub.dict_subset(col, sortx)
print('walltime: ' + ub.repr2(ranking, precision=9, nl=0))
best = next(iter(ranking))
#pairs = list(ub.iter_window( 2))
pairs = [(k, best) for k in ranking]
ratios = [ranking[k1] / ranking[k2] for k1, k2 in pairs]
nicekeys = ['{}/{}'.format(k1, k2) for k1, k2 in pairs]
relratios = ub.odict(zip(nicekeys, ratios))
print('speedup: ' + ub.repr2(relratios, precision=4, nl=0))
# xdoc +REQUIRES(--show)
# import pytest
# pytest.skip()
import pandas as pd
df = pd.DataFrame.from_dict(results)
df.columns.name = 'hasher'
df.index.name = 'N'
ratios = df.copy().drop(columns=df.columns)
for k1, k2 in [('sha512', 'xxh32'), ('sha1', 'xxh32'), ('xxh64', 'xxh32')]:
ratios['{}/{}'.format(k1, k2)] = df[k1] / df[k2]
print()
print('Seconds per iteration')
print(df.to_string(float_format='%.9f'))
print()
print('Ratios of seconds')
print(ratios.to_string(float_format='%.2f'))
print()
print('Average Ratio (over all N)')
print('convert = {!r}'.format(convert))
print(ratios.mean().sort_values())
if ub.argflag('--show'):
import kwplot
kwplot.autompl()
xdata = sorted(ub.peek(results.values()).keys())
ydata = ub.map_values(lambda d: [d[x] for x in xdata], results)
kwplot.multi_plot(xdata, ydata, xlabel='N', ylabel='seconds', title='convert = {}'.format(convert))
kwplot.show_if_requested()
def benchmark_hash_extensions():
""""
xdoctest ~/code/ubelt/dev/bench/bench_hash.py benchmark_hash_extensions
"""
import ubelt as ub
import uuid
import numpy as np
datas = [
ub.Path('/'),
uuid.uuid4(),
np.array([1, 2, 3])
]
import timerit
ti = timerit.Timerit(10000, bestof=10, verbose=2)
for timer in ti.reset('time'):
with timer:
for data in datas:
ub.hash_data(data)
if __name__ == '__main__':
"""
CommandLine:
python ~/code/ubelt/dev/bench_hash.py
"""
benchmark_hash_data()
|