1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
|
//===-- Analyze benchmark JSON files --------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This code analyzes the json file produced by the `automemcpy` binary.
//
// As a remainder, `automemcpy` will benchmark each autogenerated memory
// functions against one of the predefined distributions available in the
// `libc/benchmarks/distributions` folder.
//
// It works as follows:
// - Reads one or more json files.
// - If there are several runs for the same function and distribution, picks the
// median throughput (aka `BytesPerSecond`).
// - Aggregates the throughput per distributions and scores them from worst (0)
// to best (1).
// - Each distribution categorizes each function into one of the following
// categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE,
// BAD.
// - A process similar to the Majority Judgment voting system is used to `elect`
// the best function. The histogram of grades is returned so we can
// distinguish between functions with the same final grade. In the following
// example both functions grade EXCELLENT but we may prefer the second one.
//
// | | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | ...
// |------------|-----------|-----------|------|----------| ...
// | Function_1 | 7 | 1 | 2 | | ...
// | Function_2 | 6 | 4 | | | ...
#include "automemcpy/ResultAnalyzer.h"
#include "llvm/ADT/StringRef.h"
#include <numeric>
#include <unordered_map>
namespace llvm {
namespace automemcpy {
StringRef Grade::getString(const GradeEnum &GE) {
switch (GE) {
case EXCELLENT:
return "EXCELLENT";
case VERY_GOOD:
return "VERY_GOOD";
case GOOD:
return "GOOD";
case PASSABLE:
return "PASSABLE";
case INADEQUATE:
return "INADEQUATE";
case MEDIOCRE:
return "MEDIOCRE";
case BAD:
return "BAD";
case ARRAY_SIZE:
report_fatal_error("logic error");
}
}
Grade::GradeEnum Grade::judge(double Score) {
if (Score >= 6. / 7)
return EXCELLENT;
if (Score >= 5. / 7)
return VERY_GOOD;
if (Score >= 4. / 7)
return GOOD;
if (Score >= 3. / 7)
return PASSABLE;
if (Score >= 2. / 7)
return INADEQUATE;
if (Score >= 1. / 7)
return MEDIOCRE;
return BAD;
}
static double computeUnbiasedSampleVariance(const std::vector<double> &Samples,
const double SampleMean) {
assert(!Samples.empty());
if (Samples.size() == 1)
return 0;
double DiffSquaresSum = 0;
for (const double S : Samples) {
const double Diff = S - SampleMean;
DiffSquaresSum += Diff * Diff;
}
return DiffSquaresSum / (Samples.size() - 1);
}
static void processPerDistributionData(PerDistributionData &Data) {
auto &Samples = Data.BytesPerSecondSamples;
assert(!Samples.empty());
// Sample Mean
const double Sum = std::accumulate(Samples.begin(), Samples.end(), 0.0);
Data.BytesPerSecondMean = Sum / Samples.size();
// Unbiased Sample Variance
Data.BytesPerSecondVariance =
computeUnbiasedSampleVariance(Samples, Data.BytesPerSecondMean);
// Median
const size_t HalfSize = Samples.size() / 2;
std::nth_element(Samples.begin(), Samples.begin() + HalfSize, Samples.end());
Data.BytesPerSecondMedian = Samples[HalfSize];
}
std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) {
std::unordered_map<FunctionId, FunctionData, FunctionId::Hasher> Functions;
for (const auto &S : Samples) {
if (S.Type != SampleType::ITERATION)
break;
auto &Function = Functions[S.Id.Function];
auto &Data = Function.PerDistributionData[S.Id.Distribution.Name];
Data.BytesPerSecondSamples.push_back(S.BytesPerSecond);
}
std::vector<FunctionData> Output;
for (auto &[FunctionId, Function] : Functions) {
Function.Id = FunctionId;
for (auto &Pair : Function.PerDistributionData)
processPerDistributionData(Pair.second);
Output.push_back(std::move(Function));
}
return Output;
}
void fillScores(MutableArrayRef<FunctionData> Functions) {
// A key to bucket throughput per function type and distribution.
struct Key {
FunctionType Type;
StringRef Distribution;
COMPARABLE_AND_HASHABLE(Key, Type, Distribution)
};
// Tracks minimum and maximum values.
struct MinMax {
double Min = std::numeric_limits<double>::max();
double Max = std::numeric_limits<double>::min();
void update(double Value) {
if (Value < Min)
Min = Value;
if (Value > Max)
Max = Value;
}
double normalize(double Value) const { return (Value - Min) / (Max - Min); }
};
std::unordered_map<Key, MinMax, Key::Hasher> ThroughputMinMax;
for (const auto &Function : Functions) {
const FunctionType Type = Function.Id.Type;
for (const auto &Pair : Function.PerDistributionData) {
const auto &Distribution = Pair.getKey();
const double Throughput = Pair.getValue().BytesPerSecondMedian;
const Key K{Type, Distribution};
ThroughputMinMax[K].update(Throughput);
}
}
for (auto &Function : Functions) {
const FunctionType Type = Function.Id.Type;
for (const auto &Pair : Function.PerDistributionData) {
const auto &Distribution = Pair.getKey();
const double Throughput = Pair.getValue().BytesPerSecondMedian;
const Key K{Type, Distribution};
Function.PerDistributionData[Distribution].Score =
ThroughputMinMax[K].normalize(Throughput);
}
}
}
void castVotes(MutableArrayRef<FunctionData> Functions) {
for (FunctionData &Function : Functions) {
Function.ScoresGeoMean = 1.0;
for (const auto &Pair : Function.PerDistributionData) {
const StringRef Distribution = Pair.getKey();
const double Score = Pair.getValue().Score;
Function.ScoresGeoMean *= Score;
const auto G = Grade::judge(Score);
++(Function.GradeHisto[G]);
Function.PerDistributionData[Distribution].Grade = G;
}
}
for (FunctionData &Function : Functions) {
const auto &GradeHisto = Function.GradeHisto;
const size_t Votes =
std::accumulate(GradeHisto.begin(), GradeHisto.end(), 0U);
const size_t MedianVote = Votes / 2;
size_t CountedVotes = 0;
Grade::GradeEnum MedianGrade = Grade::BAD;
for (size_t I = 0; I < GradeHisto.size(); ++I) {
CountedVotes += GradeHisto[I];
if (CountedVotes > MedianVote) {
MedianGrade = Grade::GradeEnum(I);
break;
}
}
Function.FinalGrade = MedianGrade;
}
}
} // namespace automemcpy
} // namespace llvm
|