1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
|
# Test methods with long descriptive names can omit docstrings
# pylint: disable=missing-docstring
import unittest
from tempfile import NamedTemporaryFile
import os
import warnings
from Orange.data import Table, ContinuousVariable, DiscreteVariable
from Orange.data.io import CSVReader
from Orange.tests import test_filename, named_file
tab_file = """\
Feature 1\tFeature 2\tFeature 3
1.0 \t1.3 \t5
2.0 \t42 \t7
"""
csv_file = """\
Feature 1, Feature 2,Feature 3
1.0, 1.3, 5
2.0, 42, 7
"""
tab_file_nh = """\
1.0 \t1.3 \t5
2.0 \t42 \t7
"""
csv_file_nh = """\
1.0, 1.3, 5
2.0, 42, 7
"""
noncont_marked_cont = '''\
a,b
d,c
,
e,1
f,g
'''
csv_file_missing = """\
A,B
1,A
2,B
3,A
?,B
5,?
"""
class TestTabReader(unittest.TestCase):
def read_easy(self, s, name):
file = NamedTemporaryFile("wt", delete=False)
filename = file.name
try:
file.write(s)
file.close()
table = CSVReader(filename).read()
f1, f2, f3 = table.domain.variables
self.assertIsInstance(f1, DiscreteVariable)
self.assertEqual(f1.name, name + "1")
self.assertIsInstance(f2, ContinuousVariable)
self.assertEqual(f2.name, name + "2")
self.assertIsInstance(f3, ContinuousVariable)
self.assertEqual(f3.name, name + "3")
finally:
os.remove(filename)
def test_read_tab(self):
self.read_easy(tab_file, "Feature ")
self.read_easy(tab_file_nh, "Feature ")
def test_read_csv(self):
self.read_easy(csv_file, "Feature ")
self.read_easy(csv_file_nh, "Feature ")
def test_read_csv_with_na(self):
with NamedTemporaryFile(mode="w", delete=False) as tmp:
tmp.write(csv_file_missing)
table = CSVReader(tmp.name).read()
os.unlink(tmp.name)
f1, f2 = table.domain.variables
self.assertIsInstance(f1, ContinuousVariable)
self.assertIsInstance(f2, DiscreteVariable)
def test_read_nonutf8_encoding(self):
with self.assertRaises(ValueError):
with warnings.catch_warnings():
warnings.filterwarnings('error')
Table(test_filename('datasets/invalid_characters.tab'))
def test_noncontinous_marked_continuous(self):
file = NamedTemporaryFile("wt", delete=False)
file.write(noncont_marked_cont)
file.close()
with self.assertRaises(ValueError) as cm:
table = CSVReader(file.name).read()
self.assertIn('line 5, column 2', cm.exception.args[0])
def test_pr1734(self):
ContinuousVariable('foo')
file = NamedTemporaryFile("wt", delete=False)
filename = file.name
try:
file.write('''\
foo
time
123123123
''')
file.close()
CSVReader(filename).read()
finally:
os.remove(filename)
def test_csv_sniffer(self):
# GH-2785
reader = CSVReader(test_filename('datasets/test_asn_data_working.csv'))
data = reader.read()
self.assertEqual(len(data), 8)
self.assertEqual(len(data.domain.variables) + len(data.domain.metas), 15)
def test_utf_8_sig(self):
with named_file(csv_file, encoding="utf-8-sig") as f:
reader = CSVReader(f)
data = reader.read()
self.assertEqual(data.domain[0].name, "Feature 1")
if __name__ == "__main__":
unittest.main()
|