1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
|
"""Utilities for diffing test files and their output"""
import codecs
import difflib
import re
from cram._encoding import b
__all__ = ['esc', 'glob', 'regex', 'unified_diff']
def _regex(pattern, s):
"""Match a regular expression or return False if invalid.
>>> from cram._encoding import b
>>> [bool(_regex(r, b('foobar'))) for r in (b('foo.*'), b('***'))]
[True, False]
"""
try:
return re.match(pattern + b(r'\Z'), s)
except re.error:
return False
def _glob(el, l):
r"""Match a glob-like pattern.
The only supported special characters are * and ?. Escaping is
supported.
>>> from cram._encoding import b
>>> bool(_glob(b(r'\* \\ \? fo?b*'), b('* \\ ? foobar')))
True
"""
i, n = 0, len(el)
res = b('')
while i < n:
c = el[i:i + 1]
i += 1
if c == b('\\') and el[i] in b('*?\\'):
res += el[i - 1:i + 1]
i += 1
elif c == b('*'):
res += b('.*')
elif c == b('?'):
res += b('.')
else:
res += re.escape(c)
return _regex(res, l)
def _matchannotation(keyword, matchfunc, el, l):
"""Apply match function based on annotation keyword"""
ann = b(' (%s)\n' % keyword)
return el.endswith(ann) and matchfunc(el[:-len(ann)], l[:-1])
def regex(el, l):
"""Apply a regular expression match to a line annotated with '(re)'"""
return _matchannotation('re', _regex, el, l)
def glob(el, l):
"""Apply a glob match to a line annotated with '(glob)'"""
return _matchannotation('glob', _glob, el, l)
def esc(el, l):
"""Apply an escape match to a line annotated with '(esc)'"""
ann = b(' (esc)\n')
if el.endswith(ann):
el = codecs.escape_decode(el[:-len(ann)])[0] + b('\n')
if el == l:
return True
if l.endswith(ann):
l = codecs.escape_decode(l[:-len(ann)])[0] + b('\n')
return el == l
class _SequenceMatcher(difflib.SequenceMatcher, object):
"""Like difflib.SequenceMatcher, but supports custom match functions"""
def __init__(self, *args, **kwargs):
self._matchers = kwargs.pop('matchers', [])
super(_SequenceMatcher, self).__init__(*args, **kwargs)
def _match(self, el, l):
"""Tests for matching lines using custom matchers"""
for matcher in self._matchers:
if matcher(el, l):
return True
return False
def find_longest_match(self, alo, ahi, blo, bhi):
"""Find longest matching block in a[alo:ahi] and b[blo:bhi]"""
# SequenceMatcher uses find_longest_match() to slowly whittle down
# the differences between a and b until it has each matching block.
# Because of this, we can end up doing the same matches many times.
matches = []
for n, (el, line) in enumerate(zip(self.a[alo:ahi], self.b[blo:bhi])):
if el != line and self._match(el, line):
# This fools the superclass's method into thinking that the
# regex/glob in a is identical to b by replacing a's line (the
# expected output) with b's line (the actual output).
self.a[alo + n] = line
matches.append((n, el))
ret = super(_SequenceMatcher, self).find_longest_match(alo, ahi,
blo, bhi)
# Restore the lines replaced above. Otherwise, the diff output
# would seem to imply that the tests never had any regexes/globs.
for n, el in matches:
self.a[alo + n] = el
return ret
def unified_diff(l1, l2, fromfile=b(''), tofile=b(''), fromfiledate=b(''),
tofiledate=b(''), n=3, lineterm=b('\n'), matchers=None):
r"""Compare two sequences of lines; generate the delta as a unified diff.
This is like difflib.unified_diff(), but allows custom matchers.
>>> from cram._encoding import b
>>> l1 = [b('a\n'), b('? (glob)\n')]
>>> l2 = [b('a\n'), b('b\n')]
>>> (list(unified_diff(l1, l2, b('f1'), b('f2'), b('1970-01-01'),
... b('1970-01-02'))) ==
... [b('--- f1\t1970-01-01\n'), b('+++ f2\t1970-01-02\n'),
... b('@@ -1,2 +1,2 @@\n'), b(' a\n'), b('-? (glob)\n'), b('+b\n')])
True
>>> from cram._diff import glob
>>> list(unified_diff(l1, l2, matchers=[glob]))
[]
"""
if matchers is None:
matchers = []
started = False
matcher = _SequenceMatcher(None, l1, l2, matchers=matchers)
for group in matcher.get_grouped_opcodes(n):
if not started:
if fromfiledate:
fromdate = b('\t') + fromfiledate
else:
fromdate = b('')
if tofiledate:
todate = b('\t') + tofiledate
else:
todate = b('')
yield b('--- ') + fromfile + fromdate + lineterm
yield b('+++ ') + tofile + todate + lineterm
started = True
i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
yield (b("@@ -%d,%d +%d,%d @@" % (i1 + 1, i2 - i1, j1 + 1, j2 - j1)) +
lineterm)
for tag, i1, i2, j1, j2 in group:
if tag == 'equal':
for line in l1[i1:i2]:
yield b(' ') + line
continue
if tag == 'replace' or tag == 'delete':
for line in l1[i1:i2]:
yield b('-') + line
if tag == 'replace' or tag == 'insert':
for line in l2[j1:j2]:
yield b('+') + line
|