File: diff_utils.py

package info (click to toggle)
odoo 18.0.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 878,716 kB
  • sloc: javascript: 927,937; python: 685,670; xml: 388,524; sh: 1,033; sql: 415; makefile: 26
file content (319 lines) | stat: -rw-r--r-- 12,062 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
# -*- coding: utf-8 -*-
# Part of Odoo. See LICENSE file for full copyright and licensing details.

import re

from difflib import SequenceMatcher


# ------------------------------------------------------------
# Patch and comparison functions
# ------------------------------------------------------------


OPERATION_SEPARATOR = "\n"
LINE_SEPARATOR = "<"

PATCH_OPERATION_LINE_AT = "@"
PATCH_OPERATION_CONTENT = ":"

PATCH_OPERATION_ADD = "+"
PATCH_OPERATION_REMOVE = "-"
PATCH_OPERATION_REPLACE = "R"

PATCH_OPERATIONS = dict(
    insert=PATCH_OPERATION_ADD,
    delete=PATCH_OPERATION_REMOVE,
    replace=PATCH_OPERATION_REPLACE,
)

HTML_ATTRIBUTES_TO_REMOVE = [
    "data-last-history-steps",
]


def apply_patch(initial_content, patch):
    """Apply a patch (multiple operations) on a content.
    Each operation is a string with the following format:
    <operation_type>@<start_index>[,<end_index>][:<patch_text>*]
    patch format example:
        +@4:<p>ab</p><p>cd</p>
        +@4,15:<p>ef</p><p>gh</p>
        -@32
        -@125,129
        R@523:<b>sdf</b>

    :param string initial_content: the initial content to patch
    :param string patch: the patch to apply

    :return: string: the patched content
    """
    if patch == "":
        return initial_content

    # Replace break line in initial content to ensure they don't interfere with
    # operations
    initial_content = initial_content.replace("\n", "")
    initial_content = _remove_html_attribute(
        initial_content, HTML_ATTRIBUTES_TO_REMOVE
    )

    content = initial_content.split(LINE_SEPARATOR)
    patch_operations = patch.split(OPERATION_SEPARATOR)
    # Apply operations in reverse order to preserve the indexes integrity.
    patch_operations.reverse()

    for operation in patch_operations:
        metadata, *patch_content_line = operation.split(LINE_SEPARATOR)

        metadata_split = metadata.split(PATCH_OPERATION_LINE_AT)
        operation_type = metadata_split[0]
        lines_index_range = metadata_split[1] if len(metadata_split) > 1 else ""
        # We need to remove PATCH_OPERATION_CONTENT char from lines_index_range.
        lines_index_range = lines_index_range.split(PATCH_OPERATION_CONTENT)[0]
        indexes = lines_index_range.split(",")
        start_index = int(indexes[0])
        end_index = int(indexes[1]) if len(indexes) > 1 else start_index

        # We need to insert lines from last to the first
        # to preserve the indexes integrity.
        patch_content_line.reverse()

        if end_index > start_index:
            for index in range(end_index, start_index, -1):
                if operation_type in [
                    PATCH_OPERATION_REMOVE,
                    PATCH_OPERATION_REPLACE,
                ]:
                    del content[index]

        if operation_type in [PATCH_OPERATION_ADD, PATCH_OPERATION_REPLACE]:
            for line in patch_content_line:
                content.insert(start_index + 1, line)
        if operation_type in [PATCH_OPERATION_REMOVE, PATCH_OPERATION_REPLACE]:
            del content[start_index]

    return LINE_SEPARATOR.join(content)


HTML_TAG_ISOLATION_REGEX = r"^([^>]*>)(.*)$"
ADDITION_COMPARISON_REGEX = r"\1<added>\2</added>"
ADDITION_1ST_REPLACE_COMPARISON_REGEX = r"added>\2</added>"
DELETION_COMPARISON_REGEX = r"\1<removed>\2</removed>"
EMPTY_OPERATION_TAG = r"<(added|removed)><\/(added|removed)>"
SAME_TAG_REPLACE_FIXER = r"<\/added><(?:[^\/>]|(?:><))+><removed>"
UNNECESSARY_REPLACE_FIXER = (
    r"<added>([^<](?!<\/added>)*)<\/added>"
    r"<removed>([^<](?!<\/removed>)*)<\/removed>"
)


def generate_comparison(new_content, old_content):
    """Compare a content to an older content
    and generate a comparison html between both content.

    :param string new_content: the current content
    :param string old_content: the old content

    :return: string: the comparison content
    """
    new_content = _remove_html_attribute(new_content, HTML_ATTRIBUTES_TO_REMOVE)
    old_content = _remove_html_attribute(old_content, HTML_ATTRIBUTES_TO_REMOVE)

    if new_content == old_content:
        return new_content

    patch = generate_patch(new_content, old_content)
    comparison = new_content.split(LINE_SEPARATOR)
    patch_operations = patch.split(OPERATION_SEPARATOR)
    # We need to apply operation from last to the first
    # to preserve the indexes integrity.
    patch_operations.reverse()

    for operation in patch_operations:
        metadata, *patch_content_line = operation.split(LINE_SEPARATOR)

        metadata_split = metadata.split(PATCH_OPERATION_LINE_AT)
        operation_type = metadata_split[0]
        lines_index_range = metadata_split[1] if len(metadata_split) > 1 else ""
        lines_index_range = lines_index_range.split(PATCH_OPERATION_CONTENT)[0]
        indexes = lines_index_range.split(",")
        start_index = int(indexes[0])
        end_index = int(indexes[1]) if len(indexes) > 1 else start_index

        # If the operation is a replace, we need to flag the changes that
        # will generate ghost opening tags if we don't ignore
        # them.
        # this can append when:
        # * A change concerning only html parameters.
        #   <p class="x">a</p> => <p class="y">a</p>
        # * An addition in a previously empty element opening tag
        #   <p></p> => <p>a</p>
        if operation_type == PATCH_OPERATION_REPLACE:
            for i, line in enumerate(patch_content_line):
                current_index = start_index + i
                if current_index > end_index:
                    break

                current_line = comparison[current_index]
                current_line_tag = current_line.split(">")[0]
                line_tag = line.split(">")[0]
                if current_line[-1] == ">" and (
                    current_line_tag == line_tag
                    or current_line_tag.split(" ")[0] == line_tag.split(" ")[0]
                ):
                    comparison[start_index + i] = "delete_me>"

        # We need to insert lines from last to the first
        # to preserve the indexes integrity.
        patch_content_line.reverse()

        for index in range(end_index, start_index - 1, -1):
            if operation_type in [
                PATCH_OPERATION_REMOVE,
                PATCH_OPERATION_REPLACE,
            ]:
                deletion_flagged_comparison = re.sub(
                    HTML_TAG_ISOLATION_REGEX,
                    DELETION_COMPARISON_REGEX,
                    comparison[index],
                )
                # Only use this line if it doesn't generate an empty
                # <removed> tag
                if not re.search(
                    EMPTY_OPERATION_TAG, deletion_flagged_comparison
                ):
                    comparison[index] = deletion_flagged_comparison

        if operation_type == PATCH_OPERATION_ADD:
            for line in patch_content_line:
                addition_flagged_line = re.sub(
                    HTML_TAG_ISOLATION_REGEX, ADDITION_COMPARISON_REGEX, line
                )

                if not re.search(EMPTY_OPERATION_TAG, addition_flagged_line):
                    comparison.insert(start_index + 1, addition_flagged_line)
                else:
                    comparison.insert(start_index + 1, line)

        if operation_type == PATCH_OPERATION_REPLACE:
            for i, line in enumerate(patch_content_line):
                addition_flagged_line = re.sub(
                    HTML_TAG_ISOLATION_REGEX, ADDITION_COMPARISON_REGEX, line
                )
                if not re.search(EMPTY_OPERATION_TAG, addition_flagged_line):
                    comparison.insert(start_index, addition_flagged_line)
                elif (
                    line.split(">")[0] != comparison[start_index].split(">")[0]
                ):
                    comparison.insert(start_index, line)

    final_comparison = LINE_SEPARATOR.join(comparison)
    # We can remove all the opening tags which are located between the end of an
    # added tag and the start of a removed tag, because this should never happen
    # as the added and removed tags should always be near each other.
    # This can happen when the new container tag had a parameter change.
    final_comparison = re.sub(
        SAME_TAG_REPLACE_FIXER, "</added><removed>", final_comparison
    )

    # Remove al the <delete_me> tags
    final_comparison = final_comparison.replace(r"<delete_me>", "")

    # This fix the issue of unnecessary replace tags.
    # ex: <added>abc</added><removed>abc</removed> -> abc
    # This can occur when the new content is the same as the old content and
    # their container tags are the same but the tags parameters are different
    for match in re.finditer(UNNECESSARY_REPLACE_FIXER, final_comparison):
        if match.group(1) == match.group(2):
            final_comparison = final_comparison.replace(
                match.group(0), match.group(1)
            )

    return final_comparison


def _format_line_index(start, end):
    """Format the line index to be used in a patch operation.

    :param start: the start index
    :param end: the end index
    :return: string
    """
    length = end - start
    if not length:
        start -= 1
    if length <= 1:
        return "{}{}".format(PATCH_OPERATION_LINE_AT, start)
    return "{}{},{}".format(PATCH_OPERATION_LINE_AT, start, start + length - 1)


def _patch_generator(new_content, old_content):
    """Generate a patch (multiple operations) between two contents.
    Each operation is a string with the following format:
    <operation_type>@<start_index>[,<end_index>][:<patch_text>*]
    patch format example:
        +@4:<p>ab</p><p>cd</p>
        +@4,15:<p>ef</p><p>gh</p>
        -@32
        -@125,129
        R@523:<b>sdf</b>

    :param string new_content: the new content
    :param string old_content: the old content

    :return: string: the patch containing all the operations to reverse
                     the new content to the old content
    """
    # remove break line in contents to ensure they don't interfere with
    # operations
    new_content = new_content.replace("\n", "")
    old_content = old_content.replace("\n", "")

    new_content_lines = new_content.split(LINE_SEPARATOR)
    old_content_lines = old_content.split(LINE_SEPARATOR)

    for group in SequenceMatcher(
        None, new_content_lines, old_content_lines, False
    ).get_grouped_opcodes(0):
        patch_content_line = []
        first, last = group[0], group[-1]
        patch_operation = _format_line_index(first[1], last[2])

        if any(tag in {"replace", "delete"} for tag, _, _, _, _ in group):
            for tag, _, _, _, _ in group:
                if tag not in {"insert", "equal", "replace"}:
                    patch_operation = PATCH_OPERATIONS[tag] + patch_operation

        if any(tag in {"replace", "insert"} for tag, _, _, _, _ in group):
            for tag, _, _, j1, j2 in group:
                if tag not in {"delete", "equal"}:
                    patch_operation = PATCH_OPERATIONS[tag] + patch_operation
                    for line in old_content_lines[j1:j2]:
                        patch_content_line.append(line)

        if patch_content_line:
            patch_content = LINE_SEPARATOR + LINE_SEPARATOR.join(
                patch_content_line
            )
            yield str(patch_operation) + PATCH_OPERATION_CONTENT + patch_content
        else:
            yield str(patch_operation)


def generate_patch(new_content, old_content):
    new_content = _remove_html_attribute(new_content, HTML_ATTRIBUTES_TO_REMOVE)
    old_content = _remove_html_attribute(old_content, HTML_ATTRIBUTES_TO_REMOVE)

    return OPERATION_SEPARATOR.join(
        list(_patch_generator(new_content, old_content))
    )


def _remove_html_attribute(html_content, attributes_to_remove):
    for attribute in attributes_to_remove:
        html_content = re.sub(
            r' {}="[^"]*"'.format(attribute), "", html_content
        )

    return html_content