File: make_regression_tdata.py

package info (click to toggle)
bitshuffle 0.5.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,048 kB
  • sloc: ansic: 5,022; python: 1,156; makefile: 49; sh: 14
file content (69 lines) | stat: -rw-r--r-- 1,894 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
"""
Script to create data used for regression testing.

"""

import numpy as np
from numpy import random
import h5py

import bitshuffle
from bitshuffle import h5
from h5py import h5z

BLOCK_SIZE = 64  # Smallish such that datasets have many blocks but are small.
COMP_LVL = 10  # ZSTD compression level
FILTER_PIPELINE = [h5.H5FILTER]
FILTER_OPTS = [
    [(BLOCK_SIZE, h5.H5_COMPRESS_LZ4)],
    [(BLOCK_SIZE, h5.H5_COMPRESS_ZSTD, COMP_LVL)],
]

OUT_FILE = "tests/data/regression_%s.h5" % bitshuffle.__version__

DTYPES = ["a1", "a2", "a3", "a4", "a6", "a8", "a10"]

f = h5py.File(OUT_FILE, "w")
g_orig = f.create_group("original")
g_comp_lz4 = f.create_group("compressed")
g_comp_zstd = f.create_group("compressed_zstd")

for dtype in DTYPES:
    for rep in ["a", "b", "c"]:
        dset_name = "%s_%s" % (dtype, rep)
        dtype = np.dtype(dtype)
        n_elem = 3 * BLOCK_SIZE + random.randint(0, BLOCK_SIZE)
        shape = (n_elem,)
        chunks = shape
        data = random.randint(0, 255, n_elem * dtype.itemsize)
        data = data.astype(np.uint8).view(dtype)

        g_orig.create_dataset(dset_name, data=data)

        # Create LZ4 compressed data
        h5.create_dataset(
            g_comp_lz4,
            bytes(dset_name, "utf-8"),
            shape,
            dtype,
            chunks=chunks,
            filter_pipeline=FILTER_PIPELINE,
            filter_flags=(h5z.FLAG_MANDATORY,),
            filter_opts=FILTER_OPTS[0],
        )
        g_comp_lz4[dset_name][:] = data

        # Create ZSTD compressed data
        h5.create_dataset(
            g_comp_zstd,
            bytes(dset_name, "utf-8"),
            shape,
            dtype,
            chunks=chunks,
            filter_pipeline=FILTER_PIPELINE,
            filter_flags=(h5z.FLAG_MANDATORY,),
            filter_opts=FILTER_OPTS[1],
        )
        g_comp_zstd[dset_name][:] = data

f.close()