File: presubmit_diff.py

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (215 lines) | stat: -rwxr-xr-x 7,043 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
#!/usr/bin/env python3
# Copyright (c) 2024 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Tool for generating a unified git diff outside of a git workspace.

This is intended as a preprocessor for presubmit_support.py.
"""
from __future__ import annotations

import argparse
import base64
import concurrent.futures
import os
import platform
import sys

import gclient_utils
from gerrit_util import (CreateHttpConn, ReadHttpResponse,
                         MAX_CONCURRENT_CONNECTION)
import subprocess2

DEV_NULL = "/dev/null"
HEADER_DELIMITER = "@@"


def fetch_content(host: str, repo: str, ref: str, file: str) -> bytes:
    """Fetches the content of a file from Gitiles.

    If the file does not exist at the commit, returns an empty bytes object.

    Args:
      host: Gerrit host.
      repo: Gerrit repo.
      ref: Gerrit commit.
      file: Path of file to fetch.

    Returns:
        Bytes of the file at the commit or an empty bytes object if the file
        does not exist at the commit.
    """
    conn = CreateHttpConn(f"{host}.googlesource.com",
                          f"{repo}/+show/{ref}/{file}?format=text")
    response = ReadHttpResponse(conn, accept_statuses=[200, 404])
    return base64.b64decode(response.read())


def git_diff(src: str | None,
             dest: str | None,
             unified: int | None = None) -> str:
    """Returns the result of `git diff --no-index` between two paths.

    If a path is not specified, the diff is against /dev/null. At least one of
    src or dest must be specified.

    Args:
      src: Source path.
      dest: Destination path.
      unified: Number of lines of context. If None, git diff uses 3 as
        the default value.

    Returns:
        A string containing the git diff.
    """
    args = ["git", "diff", "--no-index"]
    if unified is not None:
        # git diff doesn't error out even if it's given a negative <n> value.
        # e.g., --unified=-3323, -U-3
        #
        # It just ignores the value and treats it as 0.
        # hence, this script doesn't bother validating the <n> value.
        args.append(f"-U{unified}")

    args.extend(["--", src or DEV_NULL, dest or DEV_NULL])
    return subprocess2.capture(args).decode("utf-8")


def _process_diff(diff: str, src_root: str, dst_root: str) -> str:
    """Adjust paths in the diff header so they're relative to the root.

    This also modifies paths on Windows to use forward slashes.
    """
    if not diff:
        return ""

    has_chunk_header = HEADER_DELIMITER in diff
    if has_chunk_header:
        header, body = diff.split(HEADER_DELIMITER, maxsplit=1)
    else:
        # Only the file mode changed.
        header = diff

    norm_src = src_root.rstrip(os.sep)
    norm_dst = dst_root.rstrip(os.sep)

    if platform.system() == "Windows":
        # Absolute paths on Windows use the format:
        #   "a/C:\\abspath\\to\\file.txt"
        header = header.replace("\\\\", "\\")
        header = header.replace('"', "")
        header = header.replace(norm_src + "\\", "")
        header = header.replace(norm_dst + "\\", "")
    else:
        # Other systems use:
        #  a/abspath/to/file.txt
        header = header.replace(norm_src, "")
        header = header.replace(norm_dst, "")

    if has_chunk_header:
        return header + HEADER_DELIMITER + body
    return header


def _create_diff(host: str, repo: str, ref: str, root: str, file: str,
                 unified: int | None) -> str:
    new_file = os.path.join(root, file)
    if not os.path.exists(new_file):
        new_file = None

    with gclient_utils.temporary_directory() as tmp_root:
        old_file = None
        old_content = fetch_content(host, repo, ref, file)
        if old_content:
            old_file = os.path.join(tmp_root, file)
            os.makedirs(os.path.dirname(old_file), exist_ok=True)
            with open(old_file, "wb") as f:
                f.write(old_content)

        if not old_file and not new_file:
            raise RuntimeError(f"Could not access file {file} from {root} "
                               f"or from {host}/{repo}:{ref}.")

        diff = git_diff(old_file, new_file, unified)
        return _process_diff(diff, tmp_root, root)


def create_diffs(host: str,
                 repo: str,
                 ref: str,
                 root: str,
                 files: list[str],
                 unified: int | None = None) -> dict[str, str]:
    """Calculates diffs of files in a directory against a commit.

    Args:
      host: Gerrit host.
      repo: Gerrit repo.
      ref: Gerrit commit.
      root: Path of local directory containing modified files.
      files: List of file paths relative to root.
      unified: Number of lines of context. If None, git diff uses 3 as
        the default value.

    Returns:
        A dict mapping file paths to diffs.

    Raises:
        RuntimeError: If a file is missing in both the root and the repo.
    """
    diffs = {}
    with concurrent.futures.ThreadPoolExecutor(
            max_workers=MAX_CONCURRENT_CONNECTION) as executor:
        futures_to_file = {
            executor.submit(_create_diff, host, repo, ref, root, file, unified):
            file
            for file in files
        }
        for future in concurrent.futures.as_completed(futures_to_file):
            file = futures_to_file[future]
            diffs[file] = future.result()
    return diffs


def main(argv):
    parser = argparse.ArgumentParser(
        usage="%(prog)s [options] <files...>",
        description="Makes a unified git diff against a Gerrit commit.",
    )
    parser.add_argument("--output", help="File to write the diff to.")
    parser.add_argument("--host", required=True, help="Gerrit host.")
    parser.add_argument("--repo", required=True, help="Gerrit repo.")
    parser.add_argument("--ref",
                        required=True,
                        help="Gerrit ref to diff against.")
    parser.add_argument("--root",
                        required=True,
                        help="Folder containing modified files.")
    parser.add_argument("-U",
                        "--unified",
                        required=False,
                        type=int,
                        help="generate diffs with <n> lines context",
                        metavar='<n>')
    parser.add_argument(
        "files",
        nargs="+",
        help="List of changed files. Paths are relative to the repo root.",
    )
    options = parser.parse_args(argv)

    diffs = create_diffs(options.host, options.repo, options.ref, options.root,
                         options.files, options.unified)

    unified_diff = "\n".join([d for d in diffs.values() if d])
    if options.output:
        with open(options.output, "w") as f:
            f.write(unified_diff)
    else:
        print(unified_diff)

    return 0


if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))