File: test_binary_dm.py

package info (click to toggle)
python-skbio 0.5.8-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 13,224 kB
  • sloc: python: 47,839; ansic: 672; makefile: 210; javascript: 50; sh: 19
file content (125 lines) | stat: -rw-r--r-- 4,775 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# ----------------------------------------------------------------------------
# Copyright (c) 2013--, scikit-bio development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE.txt, distributed with this software.
# ----------------------------------------------------------------------------

import unittest
import tempfile
import shutil
import os

import numpy as np
import numpy.testing as npt
import h5py

from skbio import DistanceMatrix
from skbio.io.format.binary_dm import (_h5py_mat_to_skbio_mat,
                                       _skbio_mat_to_h5py_mat, _get_header,
                                       _parse_ids, _verify_dimensions,
                                       _bytes_decoder, _passthrough_decoder,
                                       _set_header,
                                       _vlen_dtype,
                                       _binary_dm_sniffer)


class BinaryMatrixTests(unittest.TestCase):
    def setUp(self):
        self.mat = np.array([[0, 0.1, 0.2],
                             [0.1, 0, 0.3],
                             [0.2, 0.3, 0]])
        self.ids = ['a', 'b', 'c']

        self.tempdir = tempfile.TemporaryDirectory()

        self.basic_fname = os.path.join(self.tempdir.name, 'basic')
        self.basic = h5py.File(self.basic_fname, 'a')
        ids = self.basic.create_dataset('order', shape=(3, ),
                                        dtype=_vlen_dtype)
        ids[:] = self.ids
        self.basic.create_dataset('matrix', data=self.mat)
        _set_header(self.basic)
        self.basic.close()

        self.badids_fname = os.path.join(self.tempdir.name, 'badids')
        self.badids = h5py.File(self.badids_fname, 'a')
        ids = self.badids.create_dataset('order', shape=(2, ),
                                         dtype=_vlen_dtype)
        ids[:] = ['a', 'b']
        self.badids.create_dataset('matrix', data=self.mat)
        _set_header(self.badids)
        self.badids.close()

        self.noheader_fname = os.path.join(self.tempdir.name, 'noheader')
        self.noheader = h5py.File(self.noheader_fname, 'a')
        ids = self.noheader.create_dataset('order', shape=(3, ),
                                           dtype=_vlen_dtype)
        ids[:] = self.ids
        self.noheader.create_dataset('matrix', data=self.mat)

    def tearDown(self):
        shutil.rmtree(self.tempdir.name)

    def test_binary_dm_sniffer(self):
        self.assertEqual((True, {}),
                         _binary_dm_sniffer(open(self.basic_fname, 'rb')))
        self.assertEqual((False, {}),
                         _binary_dm_sniffer(open(self.badids_fname, 'rb')))
        self.assertEqual((False, {}),
                         _binary_dm_sniffer(open(self.noheader_fname, 'rb')))

    def test_h5py_mat_to_skbio_mat(self):
        exp = DistanceMatrix(self.mat, self.ids)
        obs = _h5py_mat_to_skbio_mat(DistanceMatrix,
                                     h5py.File(self.basic_fname, 'r'))
        self.assertEqual(obs, exp)

    def test_skbio_mat_to_h5py_mat(self):
        fh1 = h5py.File('f1', 'a', driver='core', backing_store=False)

        mat = DistanceMatrix(self.mat, self.ids)
        _skbio_mat_to_h5py_mat(mat, fh1)
        npt.assert_equal(np.asarray(fh1['order'][:], dtype=str), mat.ids)
        npt.assert_equal(fh1['matrix'], mat.data)

    def test_get_header(self):
        self.assertEqual(_get_header(h5py.File(self.basic_fname, 'r')),
                         {'format': b'BDSM', 'version': b'2020.06'})
        self.assertEqual(_get_header(h5py.File(self.noheader_fname, 'r')),
                         None)

    def test_parse_ids(self):
        tests = [(['a', 'b', 'c'], ['a', 'b', 'c']),
                 ([b'a', b'b', b'\xc3\xa9\xc3\xb8asd'],
                  ['a', 'b', 'éøasd'])]

        for test, exp in tests:
            self.assertEqual(_parse_ids(test), exp)

    def test_verify_dimensions(self):
        self.assertTrue(_verify_dimensions(h5py.File(self.basic_fname, 'r')))
        self.assertFalse(_verify_dimensions(h5py.File(self.badids_fname, 'r')))

    def test_bytes_decoder(self):
        test = [b'', b'a', b'\xc3\xa9\xc3\xb8asd']
        exp = ['', 'a', 'éøasd']
        self.assertEqual(_bytes_decoder(test), exp)

    def test_passthrough_decoder(self):
        tests = [('', ''), ('a', 'a'), ('éøasd', 'éøasd')]
        for test, expected in tests:
            self.assertEqual(_passthrough_decoder(test), expected)

    def test_set_header(self):
        def mock():
            obj = h5py.File('bung', 'a', driver='core', backing_store=False)
            return obj

        m = mock()
        _set_header(m)


if __name__ == '__main__':
    unittest.main()