File: dict_node.py

package info (click to toggle)
python-apptools 5.3.1-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,552 kB
  • sloc: python: 9,868; makefile: 80
file content (233 lines) | stat: -rw-r--r-- 7,840 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# (C) Copyright 2005-2025 Enthought, Inc., Austin, TX
# All rights reserved.
#
# This software is provided without warranty under the terms of the BSD
# license included in LICENSE.txt and may be redistributed only under
# the conditions described in the aforementioned license. The license
# is also available online at http://www.enthought.com/licenses/BSD.txt
#
# Thanks for using Enthought open source!
from contextlib import closing
import json

from numpy import ndarray

from tables import Group as PyTablesGroup
from tables.nodes import filenode


#: The key name which identifies array objects in the JSON dict.
ARRAY_PROXY_KEY = "__array__"
NODE_KEY = "node_name"


class H5DictNode(object):
    """Dictionary-like node interface.

    Data for the dict is stored as a JSON file in a PyTables FileNode. This
    allows easy storage of Python objects, such as dictionaries and lists of
    different data types.

    Note that this is implemented using a group-node assuming that arrays are
    valid inputs and will be stored as H5 array nodes.

    Parameters
    ----------
    h5_group : H5Group instance
        Group node which will be used as a dictionary store.
    auto_flush : bool
        If True, write data to disk whenever the dict data is altered.
        Otherwise, call `flush()` explicitly to write data to disk.
    """

    #: Name of filenode where dict data is stored.
    _pyobject_data_node = "_pyobject_data"

    def __init__(self, h5_group, auto_flush=True):
        assert self.is_dict_node(h5_group)

        h5_group = self._get_pyt_group(h5_group)
        self._h5_group = h5_group
        self.auto_flush = auto_flush

        # Load dict data from the file node.
        dict_node = getattr(h5_group, self._pyobject_data_node)
        with closing(filenode.open_node(dict_node)) as f:
            self._pyobject_data = json.loads(
                f.read().decode("ascii"), object_hook=self._object_hook
            )

    # --------------------------------------------------------------------------
    #  Dictionary interface
    # --------------------------------------------------------------------------

    def __getitem__(self, key):
        return self.data[key]

    def __setitem__(self, key, value):
        self.data[key] = value
        if self.auto_flush:
            self.flush()

    def __delitem__(self, key):
        del self.data[key]
        if self.auto_flush:
            self.flush()

    def __contains__(self, key):
        return key in self.data

    def keys(self):
        return self.data.keys()

    # --------------------------------------------------------------------------
    #  Public interface
    # --------------------------------------------------------------------------

    @property
    def data(self):
        return self._pyobject_data

    @data.setter
    def data(self, new_data_dict):
        self._pyobject_data = new_data_dict
        if self.auto_flush:
            self.flush()

    def flush(self):
        """ Write buffered data to disk. """
        self._remove_pyobject_node()
        self._write_pyobject_node()

    @classmethod
    def add_to_h5file(cls, h5, node_path, data=None, **kwargs):
        """Add dict node to an H5 file at the specified path.

        Parameters
        ----------
        h5 : H5File
            The H5 file where the dictionary data will be stored.
        node_path : str
            Path to node where data is stored (e.g. '/path/to/my_dict')
        data : dict
            Data for initialization, if desired.
        """
        h5.create_group(node_path)
        group = h5[node_path]

        cls._create_pyobject_node(h5._h5, node_path, data=data)
        return cls(group, **kwargs)

    @classmethod
    def is_dict_node(cls, pytables_node):
        """Return True if PyTables node looks like an H5DictNode.

        NOTE: That this returns False if the node is an `H5DictNode` instance,
        since the input node should be a normal PyTables Group node.
        """
        # Import here to prevent circular imports
        from .file import H5Group

        if isinstance(pytables_node, H5Group):
            pytables_node = cls._get_pyt_group(pytables_node)

        if not isinstance(pytables_node, PyTablesGroup):
            return False

        return cls._pyobject_data_node in pytables_node._v_children

    # --------------------------------------------------------------------------
    #  Private interface
    # --------------------------------------------------------------------------

    def _f_remove(self):
        """This is called by H5File whenever a node is removed.

        All nodes in `_h5_group` will be removed.
        """
        for name in self._h5_group._v_children.keys():
            if name != self._pyobject_data_node:
                self._h5_group.__getattr__(name)._f_remove()
        # Remove the dict node
        self._remove_pyobject_node()
        # Remove the group node
        self._h5_group._f_remove()

    def _object_hook(self, dct):
        """This gets passed object dictionaries by `json.load(s)` and if it
        finds `ARRAY_PROXY_KEY` in the object description it returns the
        proxied array object.
        """
        if ARRAY_PROXY_KEY in dct:
            node_name = dct[NODE_KEY]
            return getattr(self._h5_group, node_name)[:]
        return dct

    def _remove_pyobject_node(self):
        node = getattr(self._h5_group, self._pyobject_data_node)
        node._f_remove()

    def _write_pyobject_node(self):
        pyt_file = self._h5_group._v_file
        node_path = self._h5_group._v_pathname
        self._create_pyobject_node(pyt_file, node_path, self.data)

    @classmethod
    def _create_pyobject_node(cls, pyt_file, node_path, data=None):
        if data is None:
            data = {}

        # Stash the array values in their own h5 nodes and return a dictionary
        # which is appropriate for JSON serialization.
        out_data = cls._handle_array_values(pyt_file, node_path, data)

        kwargs = dict(where=node_path, name=cls._pyobject_data_node)
        with closing(filenode.new_node(pyt_file, **kwargs)) as f:
            f.write(json.dumps(out_data).encode("ascii"))

    @classmethod
    def _get_pyt_group(self, group):
        if hasattr(group, "_h5_group"):
            group = group._h5_group
        return group

    @classmethod
    def _array_proxy(cls, pyt_file, group, key, array):
        """Stores an array as a normal H5 node and returns the proxy object
        which will be serialized to JSON.

        `ARRAY_PROXY_KEY` marks the object dictionary as an array proxy so that
        `_object_hook` can recognize it. `NODE_KEY` stores the node name of the
        array so that `_object_hook` can load the array data when the dict node
        is deserialized.

        """
        if key in group:
            pyt_file.remove_node(group, key)
        pyt_file.create_array(group, key, array)
        return {ARRAY_PROXY_KEY: True, NODE_KEY: key}

    @classmethod
    def _handle_array_values(cls, pyt_file, group_path, data):
        group = pyt_file.get_node(group_path)

        # Convert numpy array values to H5 array nodes.
        out_data = {}
        for key in data.keys():
            value = data[key]
            if isinstance(value, ndarray):
                out_data[key] = cls._array_proxy(pyt_file, group, key, value)
            else:
                out_data[key] = value

        # Remove stored arrays which are no longer in the data dictionary.
        pyt_children = group._v_children
        nodes_to_remove = []
        for key in pyt_children.keys():
            if key not in data:
                nodes_to_remove.append(key)

        for key in nodes_to_remove:
            pyt_file.remove_node(group, key)

        return out_data