File: _diff.py

package info (click to toggle)
pbbam 0.19.0%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 10,280 kB
  • sloc: cpp: 50,244; python: 1,371; ansic: 949; xml: 903; sh: 317; makefile: 185
file content (158 lines) | stat: -rw-r--r-- 5,630 bytes parent folder | download | duplicates (30)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
"""Utilities for diffing test files and their output"""

import codecs
import difflib
import re

from cram._encoding import b

__all__ = ['esc', 'glob', 'regex', 'unified_diff']

def _regex(pattern, s):
    """Match a regular expression or return False if invalid.

    >>> from cram._encoding import b
    >>> [bool(_regex(r, b('foobar'))) for r in (b('foo.*'), b('***'))]
    [True, False]
    """
    try:
        return re.match(pattern + b(r'\Z'), s)
    except re.error:
        return False

def _glob(el, l):
    r"""Match a glob-like pattern.

    The only supported special characters are * and ?. Escaping is
    supported.

    >>> from cram._encoding import b
    >>> bool(_glob(b(r'\* \\ \? fo?b*'), b('* \\ ? foobar')))
    True
    """
    i, n = 0, len(el)
    res = b('')
    while i < n:
        c = el[i:i + 1]
        i += 1
        if c == b('\\') and el[i] in b('*?\\'):
            res += el[i - 1:i + 1]
            i += 1
        elif c == b('*'):
            res += b('.*')
        elif c == b('?'):
            res += b('.')
        else:
            res += re.escape(c)
    return _regex(res, l)

def _matchannotation(keyword, matchfunc, el, l):
    """Apply match function based on annotation keyword"""
    ann = b(' (%s)\n' % keyword)
    return el.endswith(ann) and matchfunc(el[:-len(ann)], l[:-1])

def regex(el, l):
    """Apply a regular expression match to a line annotated with '(re)'"""
    return _matchannotation('re', _regex, el, l)

def glob(el, l):
    """Apply a glob match to a line annotated with '(glob)'"""
    return _matchannotation('glob', _glob, el, l)

def esc(el, l):
    """Apply an escape match to a line annotated with '(esc)'"""
    ann = b(' (esc)\n')

    if el.endswith(ann):
        el = codecs.escape_decode(el[:-len(ann)])[0] + b('\n')
    if el == l:
        return True

    if l.endswith(ann):
        l = codecs.escape_decode(l[:-len(ann)])[0] + b('\n')
    return el == l

class _SequenceMatcher(difflib.SequenceMatcher, object):
    """Like difflib.SequenceMatcher, but supports custom match functions"""
    def __init__(self, *args, **kwargs):
        self._matchers = kwargs.pop('matchers', [])
        super(_SequenceMatcher, self).__init__(*args, **kwargs)

    def _match(self, el, l):
        """Tests for matching lines using custom matchers"""
        for matcher in self._matchers:
            if matcher(el, l):
                return True
        return False

    def find_longest_match(self, alo, ahi, blo, bhi):
        """Find longest matching block in a[alo:ahi] and b[blo:bhi]"""
        # SequenceMatcher uses find_longest_match() to slowly whittle down
        # the differences between a and b until it has each matching block.
        # Because of this, we can end up doing the same matches many times.
        matches = []
        for n, (el, line) in enumerate(zip(self.a[alo:ahi], self.b[blo:bhi])):
            if el != line and self._match(el, line):
                # This fools the superclass's method into thinking that the
                # regex/glob in a is identical to b by replacing a's line (the
                # expected output) with b's line (the actual output).
                self.a[alo + n] = line
                matches.append((n, el))
        ret = super(_SequenceMatcher, self).find_longest_match(alo, ahi,
                                                               blo, bhi)
        # Restore the lines replaced above. Otherwise, the diff output
        # would seem to imply that the tests never had any regexes/globs.
        for n, el in matches:
            self.a[alo + n] = el
        return ret

def unified_diff(l1, l2, fromfile=b(''), tofile=b(''), fromfiledate=b(''),
                 tofiledate=b(''), n=3, lineterm=b('\n'), matchers=None):
    r"""Compare two sequences of lines; generate the delta as a unified diff.

    This is like difflib.unified_diff(), but allows custom matchers.

    >>> from cram._encoding import b
    >>> l1 = [b('a\n'), b('? (glob)\n')]
    >>> l2 = [b('a\n'), b('b\n')]
    >>> (list(unified_diff(l1, l2, b('f1'), b('f2'), b('1970-01-01'),
    ...                    b('1970-01-02'))) ==
    ...  [b('--- f1\t1970-01-01\n'), b('+++ f2\t1970-01-02\n'),
    ...   b('@@ -1,2 +1,2 @@\n'), b(' a\n'), b('-? (glob)\n'), b('+b\n')])
    True

    >>> from cram._diff import glob
    >>> list(unified_diff(l1, l2, matchers=[glob]))
    []
    """
    if matchers is None:
        matchers = []
    started = False
    matcher = _SequenceMatcher(None, l1, l2, matchers=matchers)
    for group in matcher.get_grouped_opcodes(n):
        if not started:
            if fromfiledate:
                fromdate = b('\t') + fromfiledate
            else:
                fromdate = b('')
            if tofiledate:
                todate = b('\t') + tofiledate
            else:
                todate = b('')
            yield b('--- ') + fromfile + fromdate + lineterm
            yield b('+++ ') + tofile + todate + lineterm
            started = True
        i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
        yield (b("@@ -%d,%d +%d,%d @@" % (i1 + 1, i2 - i1, j1 + 1, j2 - j1)) +
               lineterm)
        for tag, i1, i2, j1, j2 in group:
            if tag == 'equal':
                for line in l1[i1:i2]:
                    yield b(' ') + line
                continue
            if tag == 'replace' or tag == 'delete':
                for line in l1[i1:i2]:
                    yield b('-') + line
            if tag == 'replace' or tag == 'insert':
                for line in l2[j1:j2]:
                    yield b('+') + line