1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
|
# Copyright 2025 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import dataclasses
import hashlib
import os
import pickle
from typing import Optional, Any, Dict
@dataclasses.dataclass(frozen=True)
class _PresubmitCheckContext:
"""Describes and identifies a context of a specific presubmit check.
This is used as a key to cache results of a presubmit check. The histograms
directory hash is used to lower the probability of changes in one file
impacing health status of presubmit checks in the other. This still doesn't
eliminate the risk of changes from outside of this directory affecting the
health status, but given how PRESUBMIT is triggered, this seems to be inline
with the risk of that happening because of PRESUBMIT not being triggered.
Attributes:
histograms_directory_hash: A sha256 hash of the entire contents of the
histograms directory (combined hash of all the files) that is used to
key the cache and invalidate it when the directory content changes.
check_id: A unique identifier of the check that is being cached. As the
single directory presubmit can have more then one check in it, this id
is used to determine which cache content should be used for which check.
"""
histograms_directory_hash: str
check_id: int
def key(self):
return f"{self.histograms_directory_hash}:{self.check_id}"
_CURRENT_CACHE_FILE_SCHEMA_VERSION = 1
@dataclasses.dataclass(frozen=True)
class CacheFileSchema:
"""Describes the schema of the cache file."""
version: int
data: Dict[str, Any]
def _CalculateCombinedDirectoryHash(directory_path):
"""Calculates a sha256 hash of the entire contents of a directory."""
hasher = hashlib.sha256()
for root, _, files in os.walk(directory_path):
for file in sorted(files):
file_path = os.path.join(root, file)
# Read the file in chunks to avoid loading the entire file into memory.
# Chunk of 4kb is arbitrary multiple of sha256 block size of 64b.
with open(file_path, "rb") as f:
chunk = f.read(4096)
while chunk:
hasher.update(chunk)
chunk = f.read(4096)
return hasher.hexdigest()
class PresubmitCache:
"""Stores and retrieves results of a presubmit checks for presubmits."""
_cache_contents: CacheFileSchema
_storage_file_path: str
_observed_directory: str
def __init__(self, storage_file_path: str, observed_directory_path: str):
self._storage_file_path = storage_file_path
self._observed_directory = observed_directory_path
self._cache_contents = CacheFileSchema(
version=_CURRENT_CACHE_FILE_SCHEMA_VERSION,
data={},
)
if not os.path.exists(self._storage_file_path) or os.path.getsize(
self._storage_file_path) == 0:
return
# Attempt to restore the cache from the file. If it fails we will just
# create a new, empty cache.
with open(self._storage_file_path, "rb") as f:
try:
loaded_cache = pickle.load(f)
if loaded_cache.version == _CURRENT_CACHE_FILE_SCHEMA_VERSION:
self._cache_contents = loaded_cache
except pickle.PickleError:
pass
def _GetForContext(self, context: _PresubmitCheckContext) -> Optional[str]:
if context.key() not in self._cache_contents.data:
return None
return self._cache_contents.data[context.key()]
def _StoreForContext(self, context: _PresubmitCheckContext,
check_result: Any):
self._cache_contents.data[context.key()] = check_result
self._SaveCurrentCache()
def _SaveCurrentCache(self):
with open(self._storage_file_path, "wb") as f:
pickle.dump(self._cache_contents, f)
def InspectCacheForTesting(self) -> CacheFileSchema:
return self._cache_contents
def RetrieveResultFromCache(self, check_id: int) -> Optional[Any]:
return self._GetForContext(
_PresubmitCheckContext(
histograms_directory_hash=_CalculateCombinedDirectoryHash(
self._observed_directory),
check_id=check_id,
))
def StoreResultInCache(self, check_id: int, check_result: Any):
self._StoreForContext(
_PresubmitCheckContext(
histograms_directory_hash=_CalculateCombinedDirectoryHash(
self._observed_directory),
check_id=check_id,
), check_result)
|