1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
|
import bz2
import gzip
import random
from time import perf_counter
from bitarray.util import (
ones, random_p,
serialize, deserialize,
sc_encode, sc_decode,
vl_encode, vl_decode,
)
from sc_stat import sc_stat
def p_range():
n = 1 << 28
p = 1.0
a = ones(n)
print(" p ratio raw"
" type 1 type 2 type 3 type 4")
print(" " + 73 *'-')
while p > 1e-8:
b = sc_encode(a)
blocks = sc_stat(b)['blocks']
print(' %11.8f %11.8f %8d %8d %8d %8d %8d' %
tuple([p, len(b) / (n / 8)] + blocks))
assert a == sc_decode(b)
a &= random_p(n)
p /= 2
def compare():
n = 1 << 26
a = random_p(n, 1.0 / 1024)
raw = a.tobytes()
print(20 * ' ' + "compress (ms) decompress (ms) ratio")
print(70 * '-')
for name, f_e, f_d in [
('serialize', serialize, deserialize),
('vl', vl_encode, vl_decode),
('sc' , sc_encode, sc_decode),
('gzip', gzip.compress, gzip.decompress),
('bz2', bz2.compress, bz2.decompress)]:
x = a if name in ('serialize', 'vl', 'sc') else raw
t0 = perf_counter()
b = f_e(x) # compression
t1 = perf_counter()
c = f_d(b) # decompression
t2 = perf_counter()
print(" %-11s %16.3f %16.3f %16.4f" %
(name, 1000 * (t1 - t0), 1000 * (t2 - t1), len(b) / len(raw)))
assert c == x
if __name__ == '__main__':
random.seed(123)
compare()
p_range()
|