1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
|
# Run as python3 scripts/validation.py from the repository root directory.
# Ensure dependencies from requirements-val.txt are installed.
from argparse import ArgumentParser
from collections import Counter, namedtuple
from io import BytesIO
from itertools import chain
import os
from pathlib import Path
import re
import sys
import typing as t
import codecs
import common
import numpy as np
from scipy.stats import f
from austin.format.mojo import (
MojoFile,
MojoStack,
MojoFrameReference,
MojoMetric,
MojoFrame,
)
from test.utils import python, target
Scenario = namedtuple("Scenario", ["title", "variant", "args"])
PYTHON = (
python(os.getenv("AUSTIN_TESTS_PYTHON_VERSIONS"))
if "AUSTIN_TESTS_PYTHON_VERSIONS" in os.environ
else [sys.executable]
)
SCENARIOS = [
Scenario(
"Wall time",
"austin",
(
"-i",
"500",
*PYTHON,
target("target34.py"),
),
),
Scenario(
"CPU time",
"austin",
(
"-si",
"500",
*PYTHON,
target("target34.py"),
),
),
Scenario(
"Wall time [multiprocessing]",
"austin",
(
"-Ci",
"500",
*PYTHON,
target("target_mp.py"),
),
),
Scenario(
"CPU time [multiprocessing]",
"austin",
(
"-Csi",
"500",
*PYTHON,
target("target_mp.py"),
),
),
]
class AustinFlameGraph(dict):
def __call__(self, x):
return self.get(x, 0)
def __add__(self, other):
m = self.__class__(self)
for k, v in other.items():
n = m.setdefault(k, v.__class__()) + v
if not n and k in m:
del m[k]
continue
m[k] = n
return m
def __mul__(self, other):
m = self.__class__(self)
for k, v in self.items():
n = v * other
if not n and k in m:
del m[k]
continue
m[k] = n
return m
def __rmul__(self, other):
return self.__mul__(other)
def __truediv__(self, other):
return self * (1 / other)
def __rtruediv__(self, other):
return self.__div__(other)
def __sub__(self, other):
return self + (-other)
def __neg__(self):
m = self.__class__(self)
for k, v in m.items():
m[k] = -v
return m
def supp(self):
return set(self.keys())
def to_list(self, domain: list) -> list:
return [self(v) for v in domain]
@classmethod
def from_list(cls, stacks: t.List[t.Tuple[str, int]]) -> "AustinFlameGraph":
return sum((cls({stack: metric}) for stack, metric in stacks), cls())
@classmethod
def from_mojo(cls, data: bytes) -> "AustinFlameGraph":
fg = cls()
stack: t.List[str] = []
metric = 0
def serialize(frame: MojoFrame) -> str:
return ":".join(
(
frame.filename.string.value,
frame.scope.string.value,
str(frame.line),
str(frame.line_end),
str(frame.column),
str(frame.column_end),
)
)
for e in MojoFile(BytesIO(data)).parse():
if isinstance(e, MojoStack):
if stack:
fg += cls({";".join(stack): metric})
stack.clear()
metric = 0
elif isinstance(e, MojoFrameReference):
stack.append(serialize(e.frame))
elif isinstance(e, MojoMetric):
metric = e.value
return fg
def hotelling_two_sample_test(X, Y) -> float:
nx, p = X.shape
ny, q = Y.shape
assert p == q, "X and Y must have the same dimensionality"
dof = nx + ny - p - 1
assert (
dof > 0
), f"X ({nx}x{p}) and Y ({ny}x{q}) must have at least p ({p}) + 1 samples"
g = dof / p / (nx + ny - 2) * (nx * ny) / (nx + ny)
x_mean = np.mean(X, axis=0)
y_mean = np.mean(Y, axis=0)
delta = x_mean - y_mean
x_cov = np.cov(X, rowvar=False)
y_cov = np.cov(Y, rowvar=False)
pooled_cov = ((nx - 1) * x_cov + (ny - 1) * y_cov) / (nx + ny - 2)
# Compute the F statistic from the Hotelling T^2 statistic
statistic = g * delta.transpose() @ np.linalg.inv(pooled_cov) @ delta
f_pdf = f(p, dof)
return 1 - f_pdf.cdf(statistic)
def compare(
x: t.List[AustinFlameGraph],
y: t.List[AustinFlameGraph],
threshold: t.Optional[float] = None,
) -> float:
domain = list(set().union(*(_.supp() for _ in chain(x, y))))
if threshold is not None:
c = Counter()
for _ in chain(x, y):
c.update(_.supp())
domain = sorted([k for k, v in c.items() if v >= threshold])
X = np.array([f.to_list(domain) for f in x], dtype=np.int32)
Y = np.array([f.to_list(domain) for f in y], dtype=np.int32)
return hotelling_two_sample_test(X, Y)
def validate(args, variant: str = "austin", runs: int = 10) -> float:
austin_latest = common.download_latest(dest=Path("/tmp"), variant_name=variant)
austin_dev = common.get_dev(variant_name=variant)
return compare(
*(
[
AustinFlameGraph.from_mojo(
austin(
*args,
mojo=True,
convert=False,
).stdout
)
for _ in range(runs)
]
for austin in (austin_latest, austin_dev)
),
threshold=runs, # Keep only the stacks that are present in all runs
)
if __name__ == "__main__":
argp = ArgumentParser()
argp.add_argument(
"-k",
type=re.compile,
help="Run data validation scenarios that match the given regular expression",
)
argp.add_argument(
"-n",
type=int,
default=30,
help="Number of profiles to collect",
)
argp.add_argument(
"-i",
"--ignore-errors",
action="store_true",
help="Ignore encoding errors",
)
argp.add_argument(
"-p",
"--p-value",
type=float,
default=0.01,
help="p-value threshold",
)
opts = argp.parse_args()
if opts.ignore_errors:
codecs.register_error("strict", codecs.ignore_errors)
print("# Austin Data Validation\n")
failures: t.List[Scenario] = []
for scenario in SCENARIOS:
print(
f"Validating {scenario.title} ... ",
end="\r",
flush=True,
file=sys.stderr,
)
if (p := validate(scenario.args, scenario.variant, runs=opts.n)) < opts.p_value:
failures.append((scenario, p))
if failures:
print("💥 The following scenarios failed to validate:\n")
for scenario, p in failures:
print(f"- {scenario.title} [{p:.2%}]")
exit(1)
print(f"✨ 🍰 ✨ All {len(SCENARIOS)} scenarios validated successfully!")
|