File: dump.py

package info (click to toggle)
python-bumps 1.0.3-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,200 kB
  • sloc: python: 24,517; xml: 493; ansic: 373; makefile: 211; javascript: 99; sh: 94
file content (258 lines) | stat: -rw-r--r-- 8,631 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
"""
Print a summary of an hdf file to the console.
"""

import sys

import numpy as np
import h5py as h5

# Typing support
from typing import Iterator, Union
from pathlib import Path


def _str(s):
    if hasattr(s, "dtype") and s.dtype.kind == "V":
        return s.tobytes().rstrip(b"\x00").decode("ascii")
    return s.decode("utf-8") if isinstance(s, bytes) else s


def summarystr(group, indent=0, show_attrs=True, recursive=True):
    # type: (h5.Group, int, bool, bool) -> str
    """
    Return the structure of the HDF 5 tree as a string.

    *group* is the starting group.

    *indent* is the indent for each line.

    *show_attrs* is False if attributes should be hidden

    *recursive* is False to show only the current level of the tree.
    """
    return "\n".join(_tree_format(group, indent, show_attrs, recursive))


def summary(group, indent=0, show_attrs=True, recursive=True):
    # type: (h5.Node, int, bool, bool) -> None
    """
    Print the structure of an HDF5 tree.

    *group* is the starting group.

    *indent* is the indent for each line.

    *show_attrs* is False if attributes should be hidden

    *recursive* is False to show only the current level of the tree.
    """
    for s in _tree_format(group, indent, show_attrs, recursive):
        print(s)


def _tree_format(node, indent, show_attrs, recursive):
    # type: (h5.Node, int, bool, bool) -> Iterator[str]
    """
    Return an iterator for the lines in a formatted HDF5 tree.

    Individual lines are not terminated by newline.
    """
    # Find fields and subgroups within the group; do this ahead of time
    # so that we can show all fields before any subgroups.
    items = [(name, hasattr(child, "dtype")) for (name, child) in node.items()]
    groupnames = [name for (name, isdata) in items if not isdata]
    datasets = [name for (name, isdata) in items if isdata]

    # Yield group as "nodename(nxclass)"
    yield "".join((" " * indent, _group_str(node)))

    # Yield group attributes as "  @attr: value"
    indent += 2
    if show_attrs:
        for s in _yield_attrs(node, indent):
            yield s

    # Yield fields as "  field[NxM]: value"
    for fieldname in sorted(datasets):
        field = node[fieldname]

        # Short circuit links
        path = "/".join((node.name, fieldname))
        if "target" in field.attrs and _str(field.attrs["target"]) != path:
            yield "".join((" " * indent, fieldname, " -> ", _str(field.attrs["target"])))
            continue

        # Format field dimensions
        # print fieldname,field,field.shape,field.dtype,field.attrs['format']
        ndim = len(field.shape)
        if ndim > 1 or (ndim == 1 and field.shape[0] > 1):
            shape = "[" + "x".join(str(dim) for dim in field.shape) + "]"
        else:
            shape = ""
        # shape = '['+'x'.join( str(dim) for dim in field.shape)+']'+str(field.dtype)

        # Format string or numeric value
        # if 'S' in field.attrs['format']:
        if field.dtype.kind in ("S", "O", "V"):
            if ndim == 0:
                # raise ValueError("zero dimensions on string?")
                value = _limited_str(_str(field[()]))
            elif ndim == 1:
                if field.shape[0] == 1:
                    value = _limited_str(_str(field[0]))
                else:
                    values = field[:] if field.shape[0] <= 5 else field[:5]
                    values = [_limited_str(_str(v), width=10) for v in values]
                    if field.shape[0] > 5:
                        values.append("...")
                    value = ", ".join(values)
                value = "[" + value + "]"
            else:
                value = "[[...]]"
        elif field.dtype.kind in ("V"):
            value = f"unknown type {field.dtype.kind}"
        else:
            size = np.prod(field.shape)
            if ndim == 0:
                value = "%g" % field[()]
            elif ndim == 1:
                if size == 0:
                    value = ""
                elif size == 1:
                    value = "%g" % field[0]
                elif size <= 6:
                    value = " ".join("%g" % v for v in field[:])
                else:
                    value = " ".join("%g" % v for v in field[:6]) + " ..."
                value = "[" + value + "]"
            else:
                if size == 0:
                    value = ""
                elif field.shape[-1] <= 6:
                    x = field[:].flatten()[: field.shape[-1]]
                    value = " " + " ".join("%g" % v for v in x)
                else:
                    # print('testing', fieldname,field[:])
                    y = field[:].flatten()[:6]
                    value = " " + " ".join("%g" % v for v in y) + " ..."
                value = "[[" + value + "], ...]"

        dtype = " " + str(field.dtype)

        units = " " + field.attrs.get("units", "") if not show_attrs else ""
        # Maybe using Angstroms in units
        try:
            units = units.decode("UTF-8")
        except AttributeError:
            pass
        except UnicodeDecodeError:
            units = units.decode("ISO-8859-1")

        try:
            value = value.decode("UTF-8")
        except AttributeError:
            pass
        except UnicodeDecodeError:
            value = value.decode("ISO-8859-1")

        # Yield field: value
        yield "".join((" " * indent, fieldname, shape, dtype, units, ": ", value))

        # Yield attributes
        if show_attrs:
            for s in _yield_attrs(field, indent + 2):
                yield s

    # Yield groups.
    # If recursive, show group details, otherwise just show name.
    if recursive:
        for groupname in sorted(groupnames):
            group = node[groupname]

            # Short circuit links
            path = "/".join((node.name, groupname))
            if "target" in group.attrs and _str(group.attrs["target"]) != path:
                yield "".join((" " * indent, groupname, " -> ", _str(group.attrs["target"])))
                continue
            for s in _tree_format(group, indent, show_attrs, recursive):
                yield s
    else:
        for groupname in sorted(groupnames):
            group = node[groupname]

            # Short circuit links
            path = "/".join((node.name, groupname))
            if "target" in group.attrs and _str(group.attrs["target"]) != path:
                yield "".join((" " * indent, groupname, " -> ", _str(group.attrs["target"])))
                continue
            yield "".join((" " * indent, _group_str(node[g])))


def _yield_attrs(node, indent):
    # type: (Union[h5.Group, h5.Dataset], int) -> Iterator[str]
    """
    Iterate over the attribute values of the node, excluding NX_class.
    """
    # print "dumping",node.name,"attrs",node.attrs.keys()
    for k in sorted(node.attrs.keys()):
        if k not in ("NX_class", "target", "binary", "byteorder", "dtype", "format", "shape"):
            v = _str(node.attrs[k])
            vstr = _limited_str(v) if isinstance(v, str) else str(v)
            yield "".join((" " * indent, "@", k, ": ", vstr))


def _group_str(node):
    # type: (h5.Group) -> str
    """
    Return the name and nexus class of a node.
    """
    if node.name == "/":
        return "root"
    nxclass = "(" + _str(node.attrs["NX_class"]) + ")" if "NX_class" in node.attrs else ""
    return node.name.split("/")[-1] + nxclass


def _limited_str(s, width=60):
    # type: (str, int) -> str
    """
    Returns the string trimmed to a maximum of one line of width+3 characters,
    with ... substituted for any trimmed characters.  Leading and trailing
    blanks are removed.
    """
    if "\n" in s:
        ret = s.split("\n")[0]
        if len(ret) > width:
            ret = ret[:width] + "..."
        ret += "\\n..."
    else:
        ret = s if len(s) < width else s[:width] + "..."
    # If it is a string that looks like a float then wrap it in quotes.
    try:
        float(ret)
        ret = f'"{ret}"'
    except ValueError:
        ...
    return ret


def summarize_file(filename: Union[Path, str], show_attrs: bool = True, indent: int = 0):
    print(f"=== {filename} ===")
    with h5.File(filename, "r") as fd:
        summary(fd, indent=indent, show_attrs=show_attrs, recursive=True)


def main():
    """
    Command line interface to file dump.
    """
    # TODO: use option parser
    if len(sys.argv) <= 1:
        print("Usage: {sys.argv[0]} file...")
        sys.exit(1)
    for filename in sys.argv[1:]:
        summarize_file(filename)


if __name__ == "__main__":
    main()