File: test_basket_reader.py

package info (click to toggle)
orange3 3.40.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 15,908 kB
  • sloc: python: 162,745; ansic: 622; makefile: 322; sh: 93; cpp: 77
file content (97 lines) | stat: -rw-r--r-- 3,388 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# Test methods with long descriptive names can omit docstrings
# pylint: disable=missing-docstring

import functools
import os
import tempfile
import unittest

import numpy as np

from Orange.data.io import BasketReader


def with_file(s):
    def fle_decorator(f, s=s):
        @functools.wraps(f)
        def decorated(self, s=s):
            fle = tempfile.NamedTemporaryFile(delete=False)
            fle.write(s.encode("utf-8"))
            fle.close()
            fname = fle.name
            try:
                return f(self, fname)
            finally:
                os.remove(fname)
        return decorated
    return fle_decorator


def read_basket(filename):
    return BasketReader(filename).read()


class TestBasketReader(unittest.TestCase):
    @with_file("""a=1,b=2,c=3""")
    def test_read_variable_is_value_syntax(self, fname):
        table = read_basket(fname)
        self.assertEqual(len(table.domain.variables), 3)
        self.assertEqual(["a", "b", "c"],
                         list(map(lambda x: x.name, table.domain.variables)))
        np.testing.assert_almost_equal(table.X.todense(),
                                       np.array([[1, 2, 3]]))

    @with_file("""a,b,c,d,e""")
    def test_read_variable_only_syntax(self, fname):
        table = read_basket(fname)
        self.assertEqual(len(table.domain.variables), 5)
        np.testing.assert_almost_equal(table.X.todense(),
                                       np.array([[1, 1, 1, 1, 1]]))

    @with_file("""a=1, b=2, c=3""")
    def test_handles_spaces_between_variables(self, fname):
        table = read_basket(fname)
        self.assertEqual(len(table.domain.variables), 3)
        self.assertEqual(set(x for x in table[0]), {1, 2, 3})

    @with_file("""a=1, b=2\na=1, b=4""")
    def test_variables_can_be_listed_in_any_order(self, fname):
        table = read_basket(fname)
        self.assertEqual(len(table.domain.variables), 2)
        np.testing.assert_almost_equal(table.X.todense(),
                                       np.array([[1, 2], [1, 4]]))


    @with_file("""a,b\nc,b,a""")
    def test_variables_can_be_listed_in_any_order(self, fname):
        table = read_basket(fname)
        self.assertEqual(len(table.domain.variables), 3)
        np.testing.assert_almost_equal(table.X.todense(),
                                       np.array([[1, 1, 0], [1, 1, 1]]))

    @with_file("""č,š,ž""")
    def test_handles_unicode(self, fname):
        table = read_basket(fname)
        self.assertEqual(len(table.domain.variables), 3)
        np.testing.assert_almost_equal(table.X.todense(),
                                       np.array([[1, 1, 1]]))

    @with_file("""a=4,"x"=1.0,"y"=2.0,b=5\n"x"=1.0""")
    def test_handles_quote(self, fname):
        table = read_basket(fname)
        self.assertEqual(len(table.domain.variables), 4)

    @with_file("""a,a,b\nb=2,b=3,c""")
    def test_sums_duplicates(self, fname):
        table = read_basket(fname)
        np.testing.assert_array_equal(table.X.toarray(), [[2, 1, 0.],
                                                          [0, 5, 1]])

    def test_data_name(self):
        filename = os.path.join(os.path.dirname(__file__),
                                'datasets/iris_basket.basket')
        self.assertEqual(read_basket(filename).name, 'iris_basket')


if __name__ == "__main__":
    unittest.main()