1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
|
# -*- coding: utf-8 -*-
"""
Unit tests for the pattern score.
Author: Gertjan van den Burg
"""
import unittest
from clevercsv import detect_pattern
from clevercsv.dialect import SimpleDialect
class PatternTestCase(unittest.TestCase):
"""
Abstraction tests
"""
def test_abstraction_1(self):
out = detect_pattern.make_abstraction(
"A,B,C", SimpleDialect(delimiter=",", quotechar="", escapechar="")
)
exp = "CDCDC"
self.assertEqual(exp, out)
def test_abstraction_2(self):
out = detect_pattern.make_abstraction(
"A,\rA,A,A\r",
SimpleDialect(delimiter=",", quotechar="", escapechar=""),
)
exp = "CDCRCDCDC"
self.assertEqual(exp, out)
def test_abstraction_3(self):
out = detect_pattern.make_abstraction(
"a,a,\n,a,a\ra,a,a\r\n",
SimpleDialect(delimiter=",", quotechar="", escapechar=""),
)
exp = "CDCDCRCDCDCRCDCDC"
self.assertEqual(exp, out)
def test_abstraction_4(self):
out = detect_pattern.make_abstraction(
'a,"bc""d""e""f""a",\r\n',
SimpleDialect(delimiter=",", quotechar='"', escapechar=""),
)
exp = "CDCDC"
self.assertEqual(exp, out)
def test_abstraction_5(self):
out = detect_pattern.make_abstraction(
'a,"bc""d"",|"f|""',
SimpleDialect(delimiter=",", quotechar='"', escapechar="|"),
)
exp = "CDC"
self.assertEqual(exp, out)
def test_abstraction_6(self):
out = detect_pattern.make_abstraction(
",,,", SimpleDialect(delimiter=",", quotechar="", escapechar="")
)
exp = "CDCDCDC"
self.assertEqual(exp, out)
def test_abstraction_7(self):
out = detect_pattern.make_abstraction(
',"",,', SimpleDialect(delimiter=",", quotechar='"', escapechar="")
)
exp = "CDCDCDC"
self.assertEqual(exp, out)
def test_abstraction_8(self):
out = detect_pattern.make_abstraction(
',"",,\r\n',
SimpleDialect(delimiter=",", quotechar='"', escapechar=""),
)
exp = "CDCDCDC"
self.assertEqual(exp, out)
"""
Escape char tests
"""
def test_abstraction_9(self):
out = detect_pattern.make_abstraction(
"A,B|,C",
SimpleDialect(delimiter=",", quotechar="", escapechar="|"),
)
exp = "CDC"
self.assertEqual(exp, out)
def test_abstraction_10(self):
out = detect_pattern.make_abstraction(
'A,"B,C|"D"',
SimpleDialect(delimiter=",", quotechar='"', escapechar="|"),
)
exp = "CDC"
self.assertEqual(exp, out)
def test_abstraction_11(self):
out = detect_pattern.make_abstraction(
"a,|b,c",
SimpleDialect(delimiter=",", quotechar="", escapechar="|"),
)
exp = "CDCDC"
self.assertEqual(exp, out)
def test_abstraction_12(self):
out = detect_pattern.make_abstraction(
"a,b|,c",
SimpleDialect(delimiter=",", quotechar="", escapechar="|"),
)
exp = "CDC"
self.assertEqual(exp, out)
def test_abstraction_13(self):
out = detect_pattern.make_abstraction(
'a,"b,c|""',
SimpleDialect(delimiter=",", quotechar='"', escapechar="|"),
)
exp = "CDC"
self.assertEqual(exp, out)
def test_abstraction_14(self):
out = detect_pattern.make_abstraction(
"a,b||c",
SimpleDialect(delimiter=",", quotechar="", escapechar="|"),
)
exp = "CDC"
self.assertEqual(exp, out)
def test_abstraction_15(self):
out = detect_pattern.make_abstraction(
'a,"b|"c||d|"e"',
SimpleDialect(delimiter=",", quotechar='"', escapechar="|"),
)
exp = "CDC"
self.assertEqual(exp, out)
def test_abstraction_16(self):
out = detect_pattern.make_abstraction(
'a,"b|"c||d","e"',
SimpleDialect(delimiter=",", quotechar='"', escapechar="|"),
)
exp = "CDCDC"
self.assertEqual(exp, out)
"""
Fill empties
"""
def test_fill_empties_1(self):
out = detect_pattern.fill_empties("DDD")
exp = "CDCDCDC"
self.assertEqual(exp, out)
"""
Pattern Score tests
"""
def test_pattern_score_1(self):
# theta_1 from paper
data = (
"7,5; Mon, Jan 12;6,40\n100; Fri, Mar 21;8,23\n8,2; Thu, Sep 17;"
'2,71\n538,0;;7,26\n"NA"; Wed, Oct 4;6,93'
)
d = SimpleDialect(delimiter=",", quotechar="", escapechar="")
out = detect_pattern.pattern_score(data, d)
exp = 7 / 4
self.assertAlmostEqual(exp, out)
def test_pattern_score_2(self):
# theta_2 from paper
data = (
"7,5; Mon, Jan 12;6,40\n100; Fri, Mar 21;8,23\n8,2; Thu, Sep 17;"
'2,71\n538,0;;7,26\n"NA"; Wed, Oct 4;6,93'
)
d = SimpleDialect(delimiter=";", quotechar="", escapechar="")
out = detect_pattern.pattern_score(data, d)
exp = 10 / 3
self.assertAlmostEqual(exp, out)
def test_pattern_score_3(self):
# theta_3 from paper
data = (
"7,5; Mon, Jan 12;6,40\n100; Fri, Mar 21;8,23\n8,2; Thu, Sep 17;"
'2,71\n538,0;;7,26\n"NA"; Wed, Oct 4;6,93'
)
d = SimpleDialect(delimiter=";", quotechar='"', escapechar="")
out = detect_pattern.pattern_score(data, d)
exp = 10 / 3
self.assertAlmostEqual(exp, out)
if __name__ == "__main__":
unittest.main()
|