File: test_decode_data.py

package info (click to toggle)
liac-arff 2.5.0-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 356 kB
  • sloc: python: 2,706; makefile: 126; sh: 23
file content (127 lines) | stat: -rw-r--r-- 5,398 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import unittest
import textwrap
import arff


class TestDecodeData(unittest.TestCase):
    """Functionally tests the logic of _parse_values

    Focuses on string values, to ignore conversion logic.
    """

    def _load(self, data, n_attribs=1):
        txt = textwrap.dedent(u'''
        @RELATION testing

        {attribs}

        @DATA
        {data}
        ''').format(attribs='\n'.join('@ATTRIBUTE x%d STRING' % i
                                      for i in range(n_attribs)),
                    data=data)
        return arff.load(txt)['data']

    def assertLoadsAs(self, data, expected, n_attribs=1):
        result = self._load(data, n_attribs)
        self.assertListEqual(result, expected)

    def test_quotes(self):
        self.assertLoadsAs("""'ENACT.NOGOPMAJ,2017'""",
                           [['ENACT.NOGOPMAJ,2017']])
        self.assertLoadsAs('''"ENACT.NOGOPMAJ,2017"''',
                           [['ENACT.NOGOPMAJ,2017']])
        self.assertLoadsAs(''' 'A','B' , '"','C,D' ''',
                           [['A', 'B', '"', 'C,D']],
                           n_attribs=4)

    def test_quotes_sparse(self):
        self.assertLoadsAs("""{0 'ENACT.NOGOPMAJ,2017'}""",
                           [['ENACT.NOGOPMAJ,2017']])
        self.assertLoadsAs('''{0 "ENACT.NOGOPMAJ,2017"}''',
                           [['ENACT.NOGOPMAJ,2017']])

        self.assertLoadsAs('''{ 0 'A',1 'B' ,2 '"',3 'C,D' }''',
                           [['A', 'B', '"', 'C,D']],
                           n_attribs=4)

    def test_escapes(self):
        self.assertLoadsAs(r''' '\'' ''',
                           [["'"]])
        self.assertLoadsAs(r''' '\\' ''',
                           [["\\"]])
        self.assertLoadsAs(r''' '\\\'' ''',
                           [["\\'"]])
        self.assertLoadsAs('''"\\\\,",",\\\\"''',
                           [['\\,', ',\\']],
                           n_attribs=2)
        self.assertLoadsAs('"\\n"', [["\n"]])
        self.assertLoadsAs('"\\r"', [["\r"]])
        self.assertLoadsAs('"\\t"', [["\t"]])
        self.assertLoadsAs('"\\b"', [["\b"]])
        self.assertLoadsAs('"\\f"', [["\f"]])
        self.assertLoadsAs('"\\0"', [["\0"]])
        self.assertLoadsAs('"\\01"', [["\01"]])
        self.assertLoadsAs('"\\011"', [["\t"]])
        self.assertLoadsAs('"\\u123a"', [[u"\u123a"]])
        self.assertLoadsAs('"abc\\0abc"', [["abc\0abc"]])
        self.assertLoadsAs('"abc\\01abc"', [["abc\01abc"]])
        self.assertLoadsAs('"abc\\011abc"', [["abc\tabc"]])
        self.assertLoadsAs('"abc\\u123aabc"', [[u"abc\u123aabc"]])
        self.assertLoadsAs('"\\%"', [["%"]])  # legacy support

    def test_bad_escapes(self):
        self.assertRaises(ValueError, self._load, r" '\x00' ")
        self.assertRaises(ValueError, self._load, r" '\u1' ")
        self.assertRaises(ValueError, self._load, r" '\uzzzz' ")
        # case sensitive
        self.assertRaises(ValueError, self._load, r" '\N' ")
        self.assertRaises(ValueError, self._load, r" '\T' ")

    def test_escapes_sparse(self):
        self.assertLoadsAs(r''' {0 '\''} ''',
                           [["'"]])
        self.assertLoadsAs(r''' {0 '\\'} ''',
                           [["\\"]])
        self.assertLoadsAs(r''' {0 '\\\''} ''',
                           [["\\'"]])

    def test_bad_quotes(self):
        self.assertRaises(arff.BadLayout, self._load, r" \'A' ")
        self.assertRaises(arff.BadLayout, self._load, r" \'A ")
        self.assertRaises(arff.BadLayout, self._load, " 'A ")
        self.assertRaises(arff.BadLayout, self._load, ' "A ')
        self.assertRaises(arff.BadLayout, self._load, ' A" ')
        self.assertRaises(arff.BadLayout, self._load, " A' ")
        self.assertRaises(arff.BadLayout, self._load, r" 'A\' ")
        self.assertRaises(arff.BadLayout, self._load, r' "A\" ')

    def test_bad_quotes_sparse(self):
        self.assertRaises(arff.BadLayout, self._load, r" {0 \'A' }")
        self.assertRaises(arff.BadLayout, self._load, r" {0 \'A }")
        self.assertRaises(arff.BadLayout, self._load, " {0 'A }")
        self.assertRaises(arff.BadLayout, self._load, ' {0 "A }')
        self.assertRaises(arff.BadLayout, self._load, ' {0 A" }')
        self.assertRaises(arff.BadLayout, self._load, " {0 A' }")
        self.assertRaises(arff.BadLayout, self._load, r" {0 'A\' }")
        self.assertRaises(arff.BadLayout, self._load, r' {0 "A\" }')

    def test_multiple_values(self):
        self.assertRaises(arff.BadLayout, self._load, r" A B ")
        self.assertRaises(arff.BadLayout, self._load, r" 5 6 ")
        self.assertRaises(arff.BadLayout, self._load, r" '5' '6' ")

    def test_multiple_values_sparse(self):
        self.assertRaises(arff.BadLayout, self._load, r" {0 A B }")
        self.assertRaises(arff.BadLayout, self._load, r" {0 5 6 }")
        self.assertRaises(arff.BadLayout, self._load, r" {0 '5' '6' }")

    def test_internal_brace(self):
        self.assertRaises(arff.BadLayout, self._load, r" 0 {0 A }")
        self.assertRaises(arff.BadLayout, self._load, r" 0, {0 A }")
        self.assertRaises(arff.BadLayout, self._load, r" {0 A } 0")
        self.assertRaises(arff.BadLayout, self._load, r" {0 A }, 0")
        self.assertRaises(arff.BadLayout, self._load, r" {0")
        self.assertRaises(arff.BadLayout, self._load, r" 0}")

    # TODO: more tests of whitespace