File: test_io.py

package info (click to toggle)
orange3 3.40.0-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 15,912 kB
  • sloc: python: 162,745; ansic: 622; makefile: 322; sh: 93; cpp: 77
file content (220 lines) | stat: -rw-r--r-- 8,271 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
# Test methods with long descriptive names can omit docstrings
# pylint: disable=missing-docstring
import os
import shutil
import tempfile
import unittest
import warnings
from unittest.mock import Mock, patch

from Orange import data

from Orange.data.io import FileFormat, TabReader, CSVReader, PickleReader, ExcelReader
from Orange.data.table import get_sample_datasets_dir
from Orange.data import Table, StringVariable, Domain
from Orange.tests import test_dirname
from Orange.util import OrangeDeprecationWarning


class WildcardReader(FileFormat):
    EXTENSIONS = ('.wild', '.wild[0-9]')
    DESCRIPTION = "Dummy reader for testing extensions"

    def read(self):
        pass


class TestChooseReader(unittest.TestCase):

    def test_usual_extensions(self):
        self.assertIsInstance(FileFormat.get_reader("t.tab"), TabReader)
        self.assertIsInstance(FileFormat.get_reader("t.csv"), CSVReader)
        self.assertIsInstance(FileFormat.get_reader("t.pkl"), PickleReader)
        with self.assertRaises(OSError):
            FileFormat.get_reader("test.undefined_extension")

    def test_wildcard_extension(self):
        self.assertIsInstance(FileFormat.get_reader("t.wild"),
                              WildcardReader)
        self.assertIsInstance(FileFormat.get_reader("t.wild2"),
                              WildcardReader)
        with self.assertRaises(OSError):
            FileFormat.get_reader("t.wild2a")


class SameExtension(FileFormat):
    PRIORITY = 100
    EXTENSIONS = ('.same_extension',)
    DESCRIPTION = "Same extension, different priority"

    def read(self):
        pass


class SameExtensionPreferred(SameExtension):
    PRIORITY = 90


class SameExtensionL(SameExtension):
    PRIORITY = 110


class TestMultipleSameExtension(unittest.TestCase):

    def test_find_reader(self):
        reader = FileFormat.get_reader("some.same_extension")
        self.assertIsInstance(reader, SameExtensionPreferred)


class TestLocate(unittest.TestCase):

    def test_locate_sample_datasets(self):
        with self.assertRaises(OSError):
            FileFormat.locate("iris.tab",
                              search_dirs=[os.path.dirname(__file__)])
        iris = FileFormat.locate("iris.tab",
                                 search_dirs=[get_sample_datasets_dir()])
        self.assertEqual(os.path.basename(iris), "iris.tab")
        # test extension adding
        iris = FileFormat.locate("iris",
                                 search_dirs=[get_sample_datasets_dir()])
        self.assertEqual(os.path.basename(iris), "iris.tab")

    def test_locate_wildcard_extension(self):
        tempdir = tempfile.mkdtemp()
        with self.assertRaises(OSError):
            FileFormat.locate("t.wild9", search_dirs=[tempdir])
        fn = os.path.join(tempdir, "t.wild8")
        with open(fn, "wt") as f:
            f.write("\n")
        location = FileFormat.locate("t.wild8", search_dirs=[tempdir])
        self.assertEqual(location, fn)
        # test extension adding
        location = FileFormat.locate("t", search_dirs=[tempdir])
        self.assertEqual(location, fn)
        shutil.rmtree(tempdir)


class TestReader(unittest.TestCase):

    def setUp(self):
        data.table.dataset_dirs.append(test_dirname())

    def tearDown(self):
        data.table.dataset_dirs.remove(test_dirname())

    def test_open_bad_pickle(self):
        """
        Raise TypeError when PickleReader reads a pickle
        file without a table (and it suppose to be there).
        GH-2232
        """
        reader = PickleReader("")
        with unittest.mock.patch("pickle.load", return_value=None):
            self.assertRaises(TypeError, reader.read, "foo")

    def test_empty_columns(self):
        """Can't read files with more columns then headers. GH-1417"""
        samplefile = """\
        a, b
        1, 0,
        1, 2,
        """
        with tempfile.NamedTemporaryFile(mode="w+", delete=False) as tmp:
            tmp.write(samplefile)
        with self.assertWarns(UserWarning) as cm:
            table = CSVReader(tmp.name).read()
        os.unlink(tmp.name)
        self.assertEqual(len(table.domain.attributes), 2)
        self.assertEqual(cm.warning.args[0], "Columns with no headers were removed.")

    def test_type_annotations(self):
        class FooFormat(FileFormat):
            write_file = Mock()

        FooFormat.write('test_file', None)
        FooFormat.write_file.assert_called_with('test_file', None)

        FooFormat.OPTIONAL_TYPE_ANNOTATIONS = True
        FooFormat.write('test_file', None)
        FooFormat.write_file.assert_called_with('test_file', None, True)

        FooFormat.write('test_file', None, False)
        FooFormat.write_file.assert_called_with('test_file', None, False)

        FooFormat.OPTIONAL_TYPE_ANNOTATIONS = False
        FooFormat.write('test_file', None)
        FooFormat.write_file.assert_called_with('test_file', None)

    @patch('csv.DictWriter.writerow')
    def test_header_call(self, writer):
        CSVReader.write_headers(writer, Table("iris"), False)
        self.assertEqual(len(writer.call_args_list), 1)

        writer.reset_mock()
        CSVReader.write_headers(writer, Table("iris"), True)
        self.assertEqual(len(writer.call_args_list), 3)

    def test_load_pickle(self):
        """
        This function tests whether pickled files in older Orange loads
        correctly with newer version of Orange.
        """
        with warnings.catch_warnings():
            # in unittests on travis/github actions OrangeDeprecationWarning
            # is raised as an error. With this statement it is disabled only
            # for this test - when unpickling pickle created with version older
            # than 3.27 ordered parameter in DiscreteVariable which is
            # deprecated still appears - which will raise deprecation warning
            warnings.simplefilter('default', OrangeDeprecationWarning)
            # load pickles created with Orange 3.20
            # in next version there is a change in variables.py - line 738
            # which broke back compatibility - tests introduced after the fix
            data1 = Table("datasets/sailing-orange-3-20.pkl")
            data2 = Table("datasets/sailing-orange-3-20.pkl.gz")

            # load pickles created with Orange 3.21
            data3 = Table("datasets/sailing-orange-3-21.pkl")
            data4 = Table("datasets/sailing-orange-3-21.pkl.gz")

            examples_count = 20
            self.assertEqual(examples_count, len(data1))
            self.assertEqual(examples_count, len(data2))
            self.assertEqual(examples_count, len(data3))
            self.assertEqual(examples_count, len(data4))

            attributes_count = 3
            self.assertEqual(attributes_count, len(data1.domain.attributes))
            self.assertEqual(attributes_count, len(data2.domain.attributes))
            self.assertEqual(attributes_count, len(data3.domain.attributes))
            self.assertEqual(attributes_count, len(data4.domain.attributes))

    def test_update_origin(self):
        """
        Test if origin attributes is changed if path doesn't exist. For example
        when file moved to another computer. It tested only one scenario
        all other scenarios are tested as part of update_origin function tests.
        """
        with tempfile.TemporaryDirectory() as dir_name:
            os.mkdir(os.path.join(dir_name, "subdir"))

            var = StringVariable("Files")
            var.attributes["origin"] = "/a/b/c/d/subdir"
            table = Table.from_list(Domain([], metas=[var]), ["f1", "f2"])

            for reader in (CSVReader, TabReader, PickleReader, ExcelReader):
                dataset = os.path.join(dir_name, f"dataset{reader.EXTENSIONS[0]}")
                if reader is PickleReader:
                    reader.write_file(dataset, table)
                else:
                    reader.write_file(dataset, table, with_annotations=True)

                table = Table.from_file(dataset)
                self.assertEqual(
                    os.path.join(dir_name, "subdir"),
                    table.domain["Files"].attributes["origin"],
                )


if __name__ == "__main__":
    unittest.main()