File: patch.py

package info (click to toggle)
python-moreorless 0.4.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 184 kB
  • sloc: python: 403; makefile: 40; sh: 11
file content (179 lines) | stat: -rw-r--r-- 6,049 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import logging
import re
from dataclasses import dataclass, field
from typing import List, Optional, Sequence

__all__ = ["apply_single_file", "PatchException"]

LOG = logging.getLogger(__name__)


def apply_single_file(contents: str, patch: str, allow_offsets: bool = True) -> str:
    """
    Apply a clean patch, no fuzz, no rejects.
    """

    lines = contents.splitlines(True)
    hunks = _split_hunks(patch.splitlines(True)[2:])
    return "".join(_apply_hunks(lines, hunks, allow_offsets))


POSITION_LINE_RE = re.compile(r"@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@")


class PatchException(Exception):
    pass


class ContextException(PatchException):
    pass


def _parse_position_line(position_line: str) -> List[int]:
    """Given an `@@` line, return the four numbers within."""
    match = POSITION_LINE_RE.match(position_line)
    if not match:
        raise PatchException(f"Position line {position_line!r} failed to parse")
    return [
        int(match.group(1)),
        int(match.group(2) or "1"),
        int(match.group(3)),
        int(match.group(4) or "1"),
    ]


# TODO store the offsets too, to make filtering easier
@dataclass
class Hunk:
    position: Optional[List[int]] = None
    lines: List[str] = field(default_factory=list)


def _split_hunks(diff_lines: Sequence[str]) -> List[Hunk]:
    """
    Splits unified diff lines (after the file header) into hunks.
    """
    hunks: List[Hunk] = []
    hunk: Optional[Hunk] = None

    for line in diff_lines:
        if line.startswith("@@"):
            # Start a new hunk
            if hunk:
                hunks.append(hunk)
            hunk = Hunk(_parse_position_line(line))
        # There should not be '---' or '+++' lines here, they are stripped off
        # in apply_single_file.
        if not hunk:
            raise PatchException(f"Lines without hunk header at {line!r}")
        hunk.lines.append(line)

    if hunk and hunk.lines:
        hunks.append(hunk)

    return hunks


def _apply_hunks(lines: List[str], hunks: List[Hunk], allow_offsets: bool) -> List[str]:
    work = lines[:]
    file_offset = 0  # accumulation of delta
    prev_line = 0
    for hunk in hunks:
        assert hunk.position is not None
        pos = hunk.position[:]

        # If length is zero, this is a no-context deletion and per
        # https://www.artima.com/weblogs/viewpost.jsp?thread=164293 the numbers
        # are off by one from being actual line numbers. :/
        if pos[3] == 0:
            pos[2] += 1
        if pos[1] == 0:
            pos[0] += 1

        cur_line = pos[0] + file_offset - 1
        # Meld "No newline at end of file" up a line
        tmp = hunk.lines[:]
        for i in range(len(tmp) - 1, 0, -1):
            if tmp[i].startswith("\\ No newline"):
                del tmp[i]
                # strips newline (including dos newlines, although we don't
                # produce a those in moreorless.unified_diff)
                if tmp[i - 1].endswith("\r\n"):
                    tmp[i - 1] = tmp[i - 1][:-2]
                else:
                    tmp[i - 1] = tmp[i - 1][:-1]
        if allow_offsets:
            tmp2 = [t[1:] for t in tmp if t[0] in (" ", "-")]
            # TODO if hunks overlap, this checks against the already-modified
            # one for context, which seems wrong.  Unmodified file is something
            # like _context_match(lines, tmp2, ..., prev_line+file_offset)-file_offset

            # On a proper patch this always takes in cur_line and returns cur_line
            new_line = _context_match(work, tmp2, prev_line, len(work), cur_line)
            if new_line is None:
                raise PatchException(f"Failed to apply with offset at {cur_line}")
            if cur_line != new_line:
                LOG.info(f"Offset {new_line-cur_line}")
                cur_line = new_line

        for line in tmp[1:]:
            if line.startswith("-"):
                if line[1:] != work[cur_line]:
                    raise PatchException(f"DELETE fail at {cur_line}")
                del work[cur_line]
            elif line.startswith("+"):
                work.insert(cur_line, line[1:])
                cur_line += 1
            elif line.startswith(" "):
                if line[1:] != work[cur_line]:
                    raise PatchException(f"EQUAL fail at {cur_line}")
                cur_line += 1
            elif line.startswith("?"):  # pragma: no cover
                pass  # human readable line
            else:
                raise PatchException(f"Unknown line {line!r} at {cur_line}")
        file_offset += pos[3] - pos[1]
        prev_line = cur_line

    return work


def _context_match(
    file_lines: List[str],
    context_lines: List[str],
    range_start: int,
    range_end: int,
    start: int,
) -> Optional[int]:
    """
    Finds an offset within file_lines to match context.

    Returns i such that:
    * file_lines[i:i+len] == context_lines
    * i >= range_start
    * i <= range_end - len
    * minimizes abs(i-start)
    * minimizes i if there's a tie on abs
    """
    cl = len(context_lines)
    if not range_start >= 0:
        raise ContextException("context error 1: negative range_start")
    if not range_end >= range_start:
        raise ContextException("context error 2: flipped range")
    if not range_end <= len(file_lines):
        raise ContextException("context error 3: past end")
    if not start >= range_start:
        raise ContextException("context error 4: start before range_start")
    if not start <= range_end - cl:
        raise ContextException("context error 5: start past range_end")

    for di in range(0, max(start - range_start + 1, range_end - start - cl + 1)):
        t1 = start - di
        t2 = start + di
        if t1 >= range_start:
            if all(context_lines[j] == file_lines[t1 + j] for j in range(cl)):
                return t1
        if t2 + cl <= range_end:
            if all(context_lines[j] == file_lines[t2 + j] for j in range(cl)):
                return t2
    return None