1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
|
# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
# 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022
# Python Software Foundation; All Rights Reserved
# This file is part of python-isal which is distributed under the
# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2.
# This file does not include original code from CPython. It is used to ensure
# that compression and decompression between CPython's zlib and isal_zlib
# is compatible.
import gzip
import itertools
import zlib
from pathlib import Path
from isal import igzip, isal_zlib
import pytest
DATA_DIR = Path(__file__).parent / "data"
COMPRESSED_FILE = DATA_DIR / "test.fastq.gz"
with gzip.open(str(COMPRESSED_FILE), mode="rb") as file_h:
DATA = file_h.read()
DATA_SIZES = [2**i for i in range(3, 20)]
# 100 seeds generated with random.randint(0, 2**32-1)
SEEDS_FILE = DATA_DIR / "seeds.txt"
INT_OVERFLOW = 211928379812738912738917238971289378912379823871932719823798123
# Get some negative ints and some really big ints into the mix.
SEEDS = [-INT_OVERFLOW, -3, -1, 0, 1, INT_OVERFLOW] + [
int(seed) for seed in SEEDS_FILE.read_text().splitlines()]
# Wbits for ZLIB compression, GZIP compression, and RAW compressed streams
WBITS_RANGE = list(range(9, 16)) + list(range(25, 32)) + list(range(-15, -8))
@pytest.mark.parametrize(["data_size", "value"],
itertools.product(DATA_SIZES, SEEDS))
def test_crc32(data_size, value):
data = DATA[:data_size]
assert zlib.crc32(data, value) == isal_zlib.crc32(data, value)
@pytest.mark.parametrize(["data_size", "value"],
itertools.product(DATA_SIZES, SEEDS))
def test_adler32(data_size, value):
data = DATA[:data_size]
assert zlib.adler32(data, value) == isal_zlib.adler32(data, value)
@pytest.mark.parametrize(["data_size", "level"],
itertools.product(DATA_SIZES, range(4)))
def test_compress(data_size, level):
data = DATA[:data_size]
compressed = isal_zlib.compress(data, level=level)
assert zlib.decompress(compressed) == data
@pytest.mark.parametrize(["data_size", "level"],
itertools.product(DATA_SIZES, range(10)))
def test_decompress_zlib(data_size, level):
data = DATA[:data_size]
compressed = zlib.compress(data, level=level)
decompressed = isal_zlib.decompress(compressed)
assert decompressed == data
@pytest.mark.parametrize(["data_size", "level", "wbits", "memLevel"],
itertools.product([128 * 1024], range(4),
WBITS_RANGE, range(1, 10)))
def test_decompress_wbits(data_size, level, wbits, memLevel):
data = DATA[:data_size]
compressobj = zlib.compressobj(level=level, wbits=wbits, memLevel=memLevel)
compressed = compressobj.compress(data) + compressobj.flush()
decompressed = isal_zlib.decompress(compressed, wbits=wbits)
assert data == decompressed
@pytest.mark.parametrize(["data_size", "level"],
itertools.product(DATA_SIZES, range(4)))
def test_decompress_isal_zlib(data_size, level):
data = DATA[:data_size]
compressed = isal_zlib.compress(data, level=level)
decompressed = isal_zlib.decompress(compressed)
print(len(decompressed))
assert decompressed == data
@pytest.mark.parametrize(["data_size", "level", "wbits", "memLevel"],
itertools.product([128 * 1024], range(4),
WBITS_RANGE, range(1, 10)))
def test_compress_compressobj(data_size, level, wbits, memLevel):
data = DATA[:data_size]
compressobj = isal_zlib.compressobj(level=level,
wbits=wbits,
memLevel=memLevel)
compressed = compressobj.compress(data) + compressobj.flush()
decompressed = zlib.decompress(compressed, wbits=wbits)
assert data == decompressed
@pytest.mark.parametrize(["data_size", "level", "wbits", "memLevel"],
itertools.product([128 * 1024], range(4),
WBITS_RANGE, range(1, 10)))
def test_decompress_decompressobj(data_size, level, wbits, memLevel):
data = DATA[:data_size]
compressobj = zlib.compressobj(level=level, wbits=wbits, memLevel=memLevel)
compressed = compressobj.compress(data) + compressobj.flush()
decompressobj = isal_zlib.decompressobj(wbits=wbits)
decompressed = decompressobj.decompress(compressed) + decompressobj.flush()
assert data == decompressed
assert decompressobj.unused_data == b""
assert decompressobj.unconsumed_tail == b""
def test_decompressobj_unconsumed_tail():
data = DATA[:128*1024]
compressed = zlib.compress(data)
decompressobj = isal_zlib.decompressobj()
output = decompressobj.decompress(compressed, 2048)
assert len(output) == 2048
@pytest.mark.parametrize(["data_size", "level"],
itertools.product(DATA_SIZES, range(4)))
def test_igzip_compress(data_size, level):
data = DATA[:data_size]
compressed = igzip.compress(data, compresslevel=level)
assert gzip.decompress(compressed) == data
@pytest.mark.parametrize(["data_size", "level"],
itertools.product(DATA_SIZES, range(10)))
def test_decompress_gzip(data_size, level):
data = DATA[:data_size]
compressed = gzip.compress(data, compresslevel=level)
decompressed = igzip.decompress(compressed)
assert decompressed == data
@pytest.mark.parametrize(["data_size", "level"],
itertools.product(DATA_SIZES, range(4)))
def test_decompress_igzip(data_size, level):
data = DATA[:data_size]
compressed = igzip.compress(data, compresslevel=level)
decompressed = igzip.decompress(compressed)
print(len(decompressed))
assert decompressed == data
@pytest.mark.parametrize(["unused_size", "wbits"],
itertools.product([26], [-15, 15, 31]))
def test_unused_data(unused_size, wbits):
unused_data = b"abcdefghijklmnopqrstuvwxyz"[:unused_size]
compressor = zlib.compressobj(wbits=wbits)
data = b"A meaningful sentence starts with a capital and ends with a."
compressed = compressor.compress(data) + compressor.flush()
decompressor = isal_zlib.decompressobj(wbits=wbits)
result = decompressor.decompress(compressed + unused_data)
assert result == data
assert decompressor.unused_data == unused_data
def test_zlib_dictionary_decompress():
dictionary = b"bla"
data = b"bladiebla"
compobj = zlib.compressobj(zdict=dictionary)
compressed = compobj.compress(data) + compobj.flush()
decompobj = isal_zlib.decompressobj(zdict=dictionary)
assert decompobj.decompress(compressed) == data
def test_isal_zlib_dictionary_decompress():
dictionary = b"bla"
data = b"bladiebla"
compobj = isal_zlib.compressobj(zdict=dictionary)
compressed = compobj.compress(data) + compobj.flush()
decompobj = zlib.decompressobj(zdict=dictionary)
assert decompobj.decompress(compressed) == data
|