File: files_changed.py

package info (click to toggle)
firefox 148.0.2-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 4,719,544 kB
  • sloc: cpp: 7,618,291; javascript: 6,701,749; ansic: 3,781,787; python: 1,418,389; xml: 638,647; asm: 438,962; java: 186,285; sh: 62,894; makefile: 19,011; objc: 13,092; perl: 12,763; yacc: 4,583; cs: 3,846; pascal: 3,448; lex: 1,720; ruby: 1,003; php: 436; lisp: 258; awk: 247; sql: 66; sed: 54; csh: 10; exp: 6
file content (134 lines) | stat: -rw-r--r-- 4,267 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

"""
Support for optimizing tasks based on the set of files that have changed.
"""

import logging
import os
from subprocess import CalledProcessError

from mozbuild.util import memoize
from mozpack.path import join as join_path
from mozpack.path import match as mozpackmatch
from mozversioncontrol import InvalidRepoPath, get_repository_object

from gecko_taskgraph import GECKO
from gecko_taskgraph.util.hg import get_json_pushchangedfiles

logger = logging.getLogger(__name__)


@memoize
def get_changed_files(repository, revision):
    """
    Get the set of files changed in the push headed by the given revision.
    Responses are cached, so multiple calls with the same arguments are OK.
    """
    try:
        return get_json_pushchangedfiles(repository, revision)["files"]
    except KeyError:
        # We shouldn't hit this error in CI.
        if os.environ.get("MOZ_AUTOMATION"):
            raise

        # We're likely on an unpublished commit, grab changed files from
        # version control.
        return get_locally_changed_files(GECKO)


def check(params, file_patterns):
    """Determine whether any of the files changed in the indicated push to
    https://hg.mozilla.org match any of the given file patterns."""
    repository = params.get("head_repository")
    revision = params.get("head_rev")
    if not repository or not revision:
        logger.warning(
            "Missing `head_repository` or `head_rev` parameters; "
            "assuming all files have changed"
        )
        return True

    changed_files = get_changed_files(repository, revision)

    if "comm_head_repository" in params:
        repository = params.get("comm_head_repository")
        revision = params.get("comm_head_rev")
        if not revision:
            logger.warning(
                "Missing `comm_head_rev` parameters; assuming all files have changed"
            )
            return True

        changed_files |= {
            join_path("comm", file) for file in get_changed_files(repository, revision)
        }

    for pattern in file_patterns:
        for path in changed_files:
            if mozpackmatch(path, pattern):
                return True

    return False


def _get_locally_changed_files(repo):
    try:
        vcs = get_repository_object(repo)
        s = set(vcs.get_outgoing_files("AM"))
        return s
    except (InvalidRepoPath, CalledProcessError):
        return set()


class PreloadedGetLocallyChangedFiles:
    """
    Function-like class that performs eager computation of _get_locally_changed_files
    for what looks the default repo.

    The rationale is the following:
    - computing _get_locally_changed_files is relatively slow (~600ms)
    - it's already done through an external command

    So we do that in a background thread as soon as possible, so that at the
    point when we need the result, it's already `prefetched'.
    """

    def __init__(self):
        self.preloaded_repo = None
        self.preloading_thread = None
        self.preloaded_answer = None

    def preload(self, repo):
        """
        Fire off preloading of get_locally_changed_files(repo).

        For the sake of simplicity, there can be only one preloaded repo.
        """
        import threading
        from pathlib import Path

        if self.preloaded_repo is not None:
            raise ValueError("Can only preload one repo")

        self.preloaded_repo = Path(repo)

        def preloading():
            self.preloaded_answer = _get_locally_changed_files(self.preloaded_repo)

        self.preloading_thread = threading.Thread(target=preloading, daemon=True)
        self.preloading_thread.start()

    @memoize
    def __call__(self, repo):
        if repo == self.preloaded_repo:
            # A thread can be joined many times, but it's going to happen only
            # once, thanks to @memoize.
            self.preloading_thread.join()
            return self.preloaded_answer
        return _get_locally_changed_files(repo)


get_locally_changed_files = PreloadedGetLocallyChangedFiles()