File: data_factory.py

package info (click to toggle)
glueviz 0.9.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 17,180 kB
  • ctags: 6,728
  • sloc: python: 37,111; makefile: 134; sh: 60
file content (111 lines) | stat: -rw-r--r-- 3,201 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
"""
Load files created by the astrodendro package.

astrodendro must be installed in order to use this loader
"""

from __future__ import absolute_import, division, print_function

import numpy as np
from astrodendro import Dendrogram

from glue.core.data_factories.hdf5 import is_hdf5
from glue.core.data_factories.fits import is_fits
from glue.core.data import Data
from glue.config import data_factory


__all__ = ['load_dendro', 'is_dendro']


def is_dendro(file, **kwargs):

    if is_hdf5(file):

        import h5py

        f = h5py.File(file, 'r')

        return 'data' in f and 'index_map' in f and 'newick' in f

    elif is_fits(file):

        from astropy.io import fits

        hdulist = fits.open(file, ignore_missing_end=True)

        # In recent versions of Astropy, we could do 'DATA' in hdulist etc. but
        # this doesn't work with Astropy 0.3, so we use the following method
        # instead:
        try:
            hdulist['DATA']
            hdulist['INDEX_MAP']
            hdulist['NEWICK']
        except KeyError:
            pass  # continue
        else:
            return True

        # For older versions of astrodendro, the HDUs did not have names

        # Here we use heuristics to figure out if this is likely to be a
        # dendrogram. Specifically, there should be three HDU extensions.
        # The primary HDU should be empty, HDU 1 and HDU 2 should have
        # matching shapes, and HDU 3 should have a 1D array. Also, if the
        # HDUs do have names then this is not a dendrogram since the old
        # files did not have names

        # This branch can be removed once we think most dendrogram files
        # will have HDU names.

        if len(hdulist) != 4:
            return False

        if hdulist[1].name != '' or hdulist[2].name != '' or hdulist[3].name != '':
            return False

        if hdulist[0].data is not None:
            return False

        if hdulist[1].data is None or hdulist[2].data is None or hdulist[3].data is None:
            return False

        if hdulist[1].data.shape != hdulist[2].data.shape:
            return False

        if hdulist[3].data.ndim != 1:
            return False

        # We're probably ok, so return True
        return True

    else:

        return False


@data_factory(label='Dendrogram', identifier=is_dendro, priority=1000)
def load_dendro(file):
    """
    Load a dendrogram saved by the astrodendro package

    :param file: Path to a dendrogram file
    :returns: A list of 2 glue Data objects: the original dataset, and dendrogram.
    """

    dg = Dendrogram.load_from(file)
    structs = np.arange(len(dg))
    parent = np.array([dg[i].parent.idx
                       if dg[i].parent is not None else -1
                       for i in structs])
    height = np.array([dg[i].height for i in structs])
    pk = np.array([dg[i].get_peak(True)[1] for i in structs])

    dendro = Data(parent=parent,
                  height=height,
                  peak=pk,
                  label='Dendrogram')

    im = Data(intensity=dg.data, structure=dg.index_map)
    im.join_on_key(dendro, 'structure', dendro.pixel_component_ids[0])
    return [dendro, im]