1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
|
#!/usr/bin/env python3
import base, unittest
from ptk.regex import RegexTokenizer, RegexParser, RegularExpression, LitteralCharacterClass, \
RegexParseError, buildRegex
class RegexParserTestCaseMixin(object):
# It's a bit of a PITA to test for RegularExpression objects equality, so we check
# matched strings
def _parse(self, rx):
return buildRegex(rx)
def _match(self, rx, s):
return rx.match(s)
def test_newline(self):
rx = self._parse(r'\n')
self.assertTrue(self._match(rx, '\n'))
def test_concat(self):
rx = self._parse('ab')
self.assertFalse(self._match(rx, 'a'))
self.assertTrue(self._match(rx, 'ab'))
self.assertFalse(self._match(rx, 'abc'))
def test_union(self):
rx = self._parse('a|b')
self.assertTrue(self._match(rx, 'a'))
self.assertTrue(self._match(rx, 'b'))
self.assertFalse(self._match(rx, 'ab'))
self.assertFalse(self._match(rx, 'c'))
def test_kleene(self):
rx = self._parse('a*')
self.assertTrue(self._match(rx, ''))
self.assertTrue(self._match(rx, 'a'))
self.assertTrue(self._match(rx, 'aa'))
self.assertFalse(self._match(rx, 'b'))
def test_closure(self):
rx = self._parse('a+')
self.assertFalse(self._match(rx, ''))
self.assertTrue(self._match(rx, 'a'))
self.assertTrue(self._match(rx, 'aa'))
self.assertFalse(self._match(rx, 'b'))
def test_exp_single(self):
rx = self._parse('a{2}')
self.assertFalse(self._match(rx, ''))
self.assertFalse(self._match(rx, 'a'))
self.assertTrue(self._match(rx, 'aa'))
self.assertFalse(self._match(rx, 'aaa'))
def test_exp_both(self):
rx = self._parse('a{2-3}')
self.assertFalse(self._match(rx, ''))
self.assertFalse(self._match(rx, 'a'))
self.assertTrue(self._match(rx, 'aa'))
self.assertTrue(self._match(rx, 'aaa'))
self.assertFalse(self._match(rx, 'aaaa'))
def test_class(self):
rx = self._parse('[a-c]')
self.assertTrue(self._match(rx, 'a'))
self.assertTrue(self._match(rx, 'b'))
self.assertTrue(self._match(rx, 'c'))
self.assertFalse(self._match(rx, 'd'))
def test_any(self):
rx = self._parse('.')
self.assertTrue(self._match(rx, 'U'))
self.assertFalse(self._match(rx, '\n'))
def test_prio_1(self):
rx = self._parse('a|b*')
self.assertTrue(self._match(rx, 'a'))
self.assertTrue(self._match(rx, 'b'))
self.assertTrue(self._match(rx, 'bb'))
self.assertFalse(self._match(rx, 'ab'))
def test_prio_2(self):
rx = self._parse('ab*')
self.assertTrue(self._match(rx, 'a'))
self.assertTrue(self._match(rx, 'ab'))
self.assertTrue(self._match(rx, 'abb'))
self.assertFalse(self._match(rx, 'abab'))
def test_prio_3(self):
rx = self._parse('a|bc')
self.assertTrue(self._match(rx, 'a'))
self.assertTrue(self._match(rx, 'bc'))
self.assertFalse(self._match(rx, 'ac'))
def test_paren(self):
rx = self._parse('(ab)*')
self.assertTrue(self._match(rx, 'ab'))
self.assertTrue(self._match(rx, 'abab'))
self.assertFalse(self._match(rx, 'abb'))
def test_crlf(self):
rx = self._parse(r'\r\n')
self.assertTrue(self._match(rx, '\r\n'))
def test_extra_tokens(self):
try:
rx = self._parse('ab(')
except RegexParseError:
pass
else:
self.fail()
def test_missing_paren(self):
try:
rx = self._parse('(a')
except RegexParseError:
pass
else:
self.fail()
class RegexParserUnicodeTestCase(RegexParserTestCaseMixin, unittest.TestCase):
pass
class RegexParserBytesTestCase(RegexParserTestCaseMixin, unittest.TestCase):
def _parse(self, rx):
if isinstance(rx, str):
rx = rx.encode('UTF-8')
return super()._parse(rx)
def _match(self, rx, s):
if isinstance(s, str):
s = s.encode('UTF-8')
return super()._match(rx, s)
if __name__ == '__main__':
unittest.main()
|