1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
|
# Test methods with long descriptive names can omit docstrings
# pylint: disable=missing-docstring
import os
import shutil
import tempfile
import unittest
import warnings
from unittest.mock import Mock, patch
from Orange import data
from Orange.data.io import FileFormat, TabReader, CSVReader, PickleReader, ExcelReader
from Orange.data.table import get_sample_datasets_dir
from Orange.data import Table, StringVariable, Domain
from Orange.tests import test_dirname
from Orange.util import OrangeDeprecationWarning
class WildcardReader(FileFormat):
EXTENSIONS = ('.wild', '.wild[0-9]')
DESCRIPTION = "Dummy reader for testing extensions"
def read(self):
pass
class TestChooseReader(unittest.TestCase):
def test_usual_extensions(self):
self.assertIsInstance(FileFormat.get_reader("t.tab"), TabReader)
self.assertIsInstance(FileFormat.get_reader("t.csv"), CSVReader)
self.assertIsInstance(FileFormat.get_reader("t.pkl"), PickleReader)
with self.assertRaises(OSError):
FileFormat.get_reader("test.undefined_extension")
def test_wildcard_extension(self):
self.assertIsInstance(FileFormat.get_reader("t.wild"),
WildcardReader)
self.assertIsInstance(FileFormat.get_reader("t.wild2"),
WildcardReader)
with self.assertRaises(OSError):
FileFormat.get_reader("t.wild2a")
class SameExtension(FileFormat):
PRIORITY = 100
EXTENSIONS = ('.same_extension',)
DESCRIPTION = "Same extension, different priority"
def read(self):
pass
class SameExtensionPreferred(SameExtension):
PRIORITY = 90
class SameExtensionL(SameExtension):
PRIORITY = 110
class TestMultipleSameExtension(unittest.TestCase):
def test_find_reader(self):
reader = FileFormat.get_reader("some.same_extension")
self.assertIsInstance(reader, SameExtensionPreferred)
class TestLocate(unittest.TestCase):
def test_locate_sample_datasets(self):
with self.assertRaises(OSError):
FileFormat.locate("iris.tab",
search_dirs=[os.path.dirname(__file__)])
iris = FileFormat.locate("iris.tab",
search_dirs=[get_sample_datasets_dir()])
self.assertEqual(os.path.basename(iris), "iris.tab")
# test extension adding
iris = FileFormat.locate("iris",
search_dirs=[get_sample_datasets_dir()])
self.assertEqual(os.path.basename(iris), "iris.tab")
def test_locate_wildcard_extension(self):
tempdir = tempfile.mkdtemp()
with self.assertRaises(OSError):
FileFormat.locate("t.wild9", search_dirs=[tempdir])
fn = os.path.join(tempdir, "t.wild8")
with open(fn, "wt") as f:
f.write("\n")
location = FileFormat.locate("t.wild8", search_dirs=[tempdir])
self.assertEqual(location, fn)
# test extension adding
location = FileFormat.locate("t", search_dirs=[tempdir])
self.assertEqual(location, fn)
shutil.rmtree(tempdir)
class TestReader(unittest.TestCase):
def setUp(self):
data.table.dataset_dirs.append(test_dirname())
def tearDown(self):
data.table.dataset_dirs.remove(test_dirname())
def test_open_bad_pickle(self):
"""
Raise TypeError when PickleReader reads a pickle
file without a table (and it suppose to be there).
GH-2232
"""
reader = PickleReader("")
with unittest.mock.patch("pickle.load", return_value=None):
self.assertRaises(TypeError, reader.read, "foo")
def test_empty_columns(self):
"""Can't read files with more columns then headers. GH-1417"""
samplefile = """\
a, b
1, 0,
1, 2,
"""
with tempfile.NamedTemporaryFile(mode="w+", delete=False) as tmp:
tmp.write(samplefile)
with self.assertWarns(UserWarning) as cm:
table = CSVReader(tmp.name).read()
os.unlink(tmp.name)
self.assertEqual(len(table.domain.attributes), 2)
self.assertEqual(cm.warning.args[0], "Columns with no headers were removed.")
def test_type_annotations(self):
class FooFormat(FileFormat):
write_file = Mock()
FooFormat.write('test_file', None)
FooFormat.write_file.assert_called_with('test_file', None)
FooFormat.OPTIONAL_TYPE_ANNOTATIONS = True
FooFormat.write('test_file', None)
FooFormat.write_file.assert_called_with('test_file', None, True)
FooFormat.write('test_file', None, False)
FooFormat.write_file.assert_called_with('test_file', None, False)
FooFormat.OPTIONAL_TYPE_ANNOTATIONS = False
FooFormat.write('test_file', None)
FooFormat.write_file.assert_called_with('test_file', None)
@patch('csv.DictWriter.writerow')
def test_header_call(self, writer):
CSVReader.write_headers(writer, Table("iris"), False)
self.assertEqual(len(writer.call_args_list), 1)
writer.reset_mock()
CSVReader.write_headers(writer, Table("iris"), True)
self.assertEqual(len(writer.call_args_list), 3)
def test_load_pickle(self):
"""
This function tests whether pickled files in older Orange loads
correctly with newer version of Orange.
"""
with warnings.catch_warnings():
# in unittests on travis/github actions OrangeDeprecationWarning
# is raised as an error. With this statement it is disabled only
# for this test - when unpickling pickle created with version older
# than 3.27 ordered parameter in DiscreteVariable which is
# deprecated still appears - which will raise deprecation warning
warnings.simplefilter('default', OrangeDeprecationWarning)
# load pickles created with Orange 3.20
# in next version there is a change in variables.py - line 738
# which broke back compatibility - tests introduced after the fix
data1 = Table("datasets/sailing-orange-3-20.pkl")
data2 = Table("datasets/sailing-orange-3-20.pkl.gz")
# load pickles created with Orange 3.21
data3 = Table("datasets/sailing-orange-3-21.pkl")
data4 = Table("datasets/sailing-orange-3-21.pkl.gz")
examples_count = 20
self.assertEqual(examples_count, len(data1))
self.assertEqual(examples_count, len(data2))
self.assertEqual(examples_count, len(data3))
self.assertEqual(examples_count, len(data4))
attributes_count = 3
self.assertEqual(attributes_count, len(data1.domain.attributes))
self.assertEqual(attributes_count, len(data2.domain.attributes))
self.assertEqual(attributes_count, len(data3.domain.attributes))
self.assertEqual(attributes_count, len(data4.domain.attributes))
def test_update_origin(self):
"""
Test if origin attributes is changed if path doesn't exist. For example
when file moved to another computer. It tested only one scenario
all other scenarios are tested as part of update_origin function tests.
"""
with tempfile.TemporaryDirectory() as dir_name:
os.mkdir(os.path.join(dir_name, "subdir"))
var = StringVariable("Files")
var.attributes["origin"] = "/a/b/c/d/subdir"
table = Table.from_list(Domain([], metas=[var]), ["f1", "f2"])
for reader in (CSVReader, TabReader, PickleReader, ExcelReader):
dataset = os.path.join(dir_name, f"dataset{reader.EXTENSIONS[0]}")
if reader is PickleReader:
reader.write_file(dataset, table)
else:
reader.write_file(dataset, table, with_annotations=True)
table = Table.from_file(dataset)
self.assertEqual(
os.path.join(dir_name, "subdir"),
table.domain["Files"].attributes["origin"],
)
if __name__ == "__main__":
unittest.main()
|