File: presubmit_caching_support.py

package info (click to toggle)
chromium 138.0.7204.183-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 6,071,908 kB
  • sloc: cpp: 34,937,088; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,953; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,806; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (124 lines) | stat: -rw-r--r-- 4,424 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# Copyright 2025 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import dataclasses
import hashlib
import os
import pickle
from typing import Optional, Any, Dict


@dataclasses.dataclass(frozen=True)
class _PresubmitCheckContext:
  """Describes and identifies a context of a specific presubmit check.

  This is used as a key to cache results of a presubmit check. The histograms
  directory hash is used to lower the probability of changes in one file
  impacing health status of presubmit checks in the other. This still doesn't
  eliminate the risk of changes from outside of this directory affecting the
  health status, but given how PRESUBMIT is triggered, this seems to be inline
  with the risk of that happening because of PRESUBMIT not being triggered.

  Attributes:
    histograms_directory_hash: A sha256 hash of the entire contents of the
      histograms directory (combined hash of all the files) that is used to
      key the cache and invalidate it when the directory content changes.
    check_id: A unique identifier of the check that is being cached. As the
      single directory presubmit can have more then one check in it, this id
      is used to determine which cache content should be used for which check.
  """
  histograms_directory_hash: str
  check_id: int

  def key(self):
    return f"{self.histograms_directory_hash}:{self.check_id}"


_CURRENT_CACHE_FILE_SCHEMA_VERSION = 1


@dataclasses.dataclass(frozen=True)
class CacheFileSchema:
  """Describes the schema of the cache file."""
  version: int
  data: Dict[str, Any]


def _CalculateCombinedDirectoryHash(directory_path):
  """Calculates a sha256 hash of the entire contents of a directory."""
  hasher = hashlib.sha256()
  for root, _, files in os.walk(directory_path):
    for file in sorted(files):
      file_path = os.path.join(root, file)
      # Read the file in chunks to avoid loading the entire file into memory.
      # Chunk of 4kb is arbitrary multiple of sha256 block size of 64b.
      with open(file_path, "rb") as f:
        chunk = f.read(4096)
        while chunk:
          hasher.update(chunk)
          chunk = f.read(4096)
  return hasher.hexdigest()


class PresubmitCache:
  """Stores and retrieves results of a presubmit checks for presubmits."""

  _cache_contents: CacheFileSchema
  _storage_file_path: str
  _observed_directory: str

  def __init__(self, storage_file_path: str, observed_directory_path: str):
    self._storage_file_path = storage_file_path
    self._observed_directory = observed_directory_path
    self._cache_contents = CacheFileSchema(
        version=_CURRENT_CACHE_FILE_SCHEMA_VERSION,
        data={},
    )

    if not os.path.exists(self._storage_file_path) or os.path.getsize(
        self._storage_file_path) == 0:
      return

    # Attempt to restore the cache from the file. If it fails we will just
    # create a new, empty cache.
    with open(self._storage_file_path, "rb") as f:
      try:
        loaded_cache = pickle.load(f)
        if loaded_cache.version == _CURRENT_CACHE_FILE_SCHEMA_VERSION:
          self._cache_contents = loaded_cache
      except pickle.PickleError:
        pass

  def _GetForContext(self, context: _PresubmitCheckContext) -> Optional[str]:
    if context.key() not in self._cache_contents.data:
      return None
    return self._cache_contents.data[context.key()]

  def _StoreForContext(self, context: _PresubmitCheckContext,
                       check_result: Any):
    self._cache_contents.data[context.key()] = check_result
    self._SaveCurrentCache()

  def _SaveCurrentCache(self):
    with open(self._storage_file_path, "wb") as f:
      pickle.dump(self._cache_contents, f)

  def InspectCacheForTesting(self) -> CacheFileSchema:
    return self._cache_contents

  def RetrieveResultFromCache(self, check_id: int) -> Optional[Any]:
    return self._GetForContext(
        _PresubmitCheckContext(
            histograms_directory_hash=_CalculateCombinedDirectoryHash(
                self._observed_directory),
            check_id=check_id,
        ))

  def StoreResultInCache(self, check_id: int, check_result: Any):
    self._StoreForContext(
        _PresubmitCheckContext(
            histograms_directory_hash=_CalculateCombinedDirectoryHash(
                self._observed_directory),
            check_id=check_id,
        ), check_result)