File: test_sparse_reader.py

package info (click to toggle)
orange3 3.40.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 15,908 kB
  • sloc: python: 162,745; ansic: 622; makefile: 322; sh: 93; cpp: 77
file content (92 lines) | stat: -rw-r--r-- 2,994 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# Test methods with long descriptive names can omit docstrings
# pylint: disable=missing-docstring

import os
import tempfile
import unittest

import numpy as np

from Orange.data import _io


simple_file = """\
abc, def, g=1, h ,  ij k  =5,   t # ignore this, foo=42

def  , g   , h,ij,kl=4,m,,,
# nothing here
\t\t\tdef
"""

complex_file = """\
abc, g=1, h ,  ij | k  =5,   t # ignore this, foo=42

, g   , h,ij|,kl=4, k ;m,,,
# nothing here
\t\t\t;def
"""

class TestTabReader(unittest.TestCase):
    def test_read_simple(self):
        f = tempfile.NamedTemporaryFile(delete=False)
        f.write(simple_file.encode("ascii"))
        f.close()
        try:
            X, Y, metas, attr_indices, class_indices, meta_indices = \
                _io.sparse_read_float(f.name.encode("ascii"))

            self.assertEqual(
                attr_indices,
                {b"abc": 0, b"def": 1, b"g": 2, b"h": 3, b"ij k": 4, b"t": 5,
                 b"ij": 6, b"kl": 7, b"m": 8})
            np.testing.assert_almost_equal(X.data, [1, 1, 1, 1, 5, 1,
                                                    1, 1, 1, 1, 4, 1,
                                                    1])
            np.testing.assert_equal(X.indices, [0, 1, 2, 3, 4, 5,
                                                1, 2, 3, 6, 7, 8,
                                                1])
            np.testing.assert_equal(X.indptr, [0, 6, 12, 13])

            self.assertEqual(class_indices, {})
            self.assertIsNone(Y)

            self.assertEqual(meta_indices, {})
            self.assertIsNone(metas)
        finally:
            os.remove(f.name)


    # for readability, pylint: disable=bad-whitespace
    def test_read_complex(self):
        f = tempfile.NamedTemporaryFile(delete=False)
        f.write(complex_file.encode("ascii"))
        f.close()
        try:
            X, Y, metas, attr_indices, class_indices, meta_indices = \
                _io.sparse_read_float(f.name.encode("ascii"))

            self.assertEqual(
                attr_indices,
                {b"abc": 0, b"g": 1, b"h": 2, b"ij": 3})
            np.testing.assert_equal(X.data,    [1, 1, 1, 1, 1, 1, 1])
            np.testing.assert_equal(X.indices, [0, 1, 2, 3, 1, 2, 3])
            np.testing.assert_equal(X.indptr,  [0,          4,       7, 7])


            self.assertEqual(class_indices, {b"k": 0, b"t": 1, b"kl": 2})
            np.testing.assert_equal(Y.data,    [5, 1, 1, 4])
            np.testing.assert_equal(Y.indices, [0, 1, 0, 2])
            np.testing.assert_equal(Y.indptr,  [0,    2,   4, 4])

            self.assertEqual(meta_indices, {b"m": 0, b"def": 1})
            np.testing.assert_equal(metas.data,    [   1, 1])
            np.testing.assert_equal(metas.indices, [   0, 1])
            np.testing.assert_equal(metas.indptr,  [0, 0, 1, 2])
        finally:
            os.remove(f.name)


    # TODO checks for quotes, escapes, error checking

if __name__ == "__main__":
    unittest.main()