File: virtualisation.py

package info (click to toggle)
pyhst2 2020c-1
  • links: PTS, VCS
  • area: contrib
  • in suites: bullseye
  • size: 12,532 kB
  • sloc: ansic: 11,807; python: 9,663; cpp: 6,786; makefile: 147; sh: 31
file content (37 lines) | stat: -rw-r--r-- 1,195 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
"""A simple example of building a virtual dataset.
This makes four 'source' HDF5 files, each with a 1D dataset of 100 numbers.
Then it makes a single 4x100 virtual dataset in a separate file, exposing
the four sources as one dataset.
"""

import h5py
import numpy as np

# create some sample data
data = np.arange(0, 100).reshape(1, 100) + np.arange(1, 9).reshape(8, 1)

# Create source files (0.h5 to 3.h5)
for n in range(4):
    with h5py.File(f"{n}.h5", "w") as f:
        d = f.create_dataset("data", (2,100,), "i4", data[2*n:2*n+2])

# Assemble virtual dataset
layout = h5py.VirtualLayout(shape=(4, 100), dtype="i4")
for n in range(4):
    filename = "{}.h5".format(n)
    vsource = h5py.VirtualSource(filename, "data", shape=(2,100,))[1]
    layout[n] = vsource

# Add virtual dataset to output file
with h5py.File("VDS.h5", "w", libver="latest") as f:
    f.create_virtual_dataset("vdata", layout, fillvalue=-5)
    # f.create_dataset("data", data=data, dtype="i4")


# read data back
# virtual dataset is transparent for reader!
with h5py.File("VDS.h5", "r") as f:
    print("Virtual dataset:")
    print(f["vdata"][:, :10])
    # print("Normal dataset:")
    # print(f["data"][:, :10])