1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
|
/*
* Created by Joachim on 16/04/2019.
* Adapted from donated nonius code.
*
* Distributed under the Boost Software License, Version 1.0. (See accompanying
* file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
*/
// Statistical analysis tools
#ifndef TWOBLUECUBES_CATCH_DETAIL_ANALYSIS_HPP_INCLUDED
#define TWOBLUECUBES_CATCH_DETAIL_ANALYSIS_HPP_INCLUDED
#include "../catch_clock.hpp"
#include "../catch_estimate.hpp"
#include "../catch_outlier_classification.hpp"
#include <algorithm>
#include <functional>
#include <vector>
#include <iterator>
#include <numeric>
#include <tuple>
#include <cmath>
#include <utility>
#include <cstddef>
#include <random>
namespace Catch {
namespace Benchmark {
namespace Detail {
using sample = std::vector<double>;
double weighted_average_quantile(int k, int q, std::vector<double>::iterator first, std::vector<double>::iterator last);
template <typename Iterator>
OutlierClassification classify_outliers(Iterator first, Iterator last) {
std::vector<double> copy(first, last);
auto q1 = weighted_average_quantile(1, 4, copy.begin(), copy.end());
auto q3 = weighted_average_quantile(3, 4, copy.begin(), copy.end());
auto iqr = q3 - q1;
auto los = q1 - (iqr * 3.);
auto lom = q1 - (iqr * 1.5);
auto him = q3 + (iqr * 1.5);
auto his = q3 + (iqr * 3.);
OutlierClassification o;
for (; first != last; ++first) {
auto&& t = *first;
if (t < los) ++o.low_severe;
else if (t < lom) ++o.low_mild;
else if (t > his) ++o.high_severe;
else if (t > him) ++o.high_mild;
++o.samples_seen;
}
return o;
}
template <typename Iterator>
double mean(Iterator first, Iterator last) {
auto count = last - first;
double sum = std::accumulate(first, last, 0.);
return sum / count;
}
template <typename URng, typename Iterator, typename Estimator>
sample resample(URng& rng, int resamples, Iterator first, Iterator last, Estimator& estimator) {
auto n = last - first;
std::uniform_int_distribution<decltype(n)> dist(0, n - 1);
sample out;
out.reserve(resamples);
std::generate_n(std::back_inserter(out), resamples, [n, first, &estimator, &dist, &rng] {
std::vector<double> resampled;
resampled.reserve(n);
std::generate_n(std::back_inserter(resampled), n, [first, &dist, &rng] { return first[dist(rng)]; });
return estimator(resampled.begin(), resampled.end());
});
std::sort(out.begin(), out.end());
return out;
}
template <typename Estimator, typename Iterator>
sample jackknife(Estimator&& estimator, Iterator first, Iterator last) {
auto n = last - first;
auto second = std::next(first);
sample results;
results.reserve(n);
for (auto it = first; it != last; ++it) {
std::iter_swap(it, first);
results.push_back(estimator(second, last));
}
return results;
}
inline double normal_cdf(double x) {
return std::erfc(-x / std::sqrt(2.0)) / 2.0;
}
double erfc_inv(double x);
double normal_quantile(double p);
template <typename Iterator, typename Estimator>
Estimate<double> bootstrap(double confidence_level, Iterator first, Iterator last, sample const& resample, Estimator&& estimator) {
auto n_samples = last - first;
double point = estimator(first, last);
// Degenerate case with a single sample
if (n_samples == 1) return { point, point, point, confidence_level };
sample jack = jackknife(estimator, first, last);
double jack_mean = mean(jack.begin(), jack.end());
double sum_squares, sum_cubes;
std::tie(sum_squares, sum_cubes) = std::accumulate(jack.begin(), jack.end(), std::make_pair(0., 0.), [jack_mean](std::pair<double, double> sqcb, double x) -> std::pair<double, double> {
auto d = jack_mean - x;
auto d2 = d * d;
auto d3 = d2 * d;
return { sqcb.first + d2, sqcb.second + d3 };
});
double accel = sum_cubes / (6 * std::pow(sum_squares, 1.5));
int n = static_cast<int>(resample.size());
double prob_n = std::count_if(resample.begin(), resample.end(), [point](double x) { return x < point; }) / (double)n;
// degenerate case with uniform samples
if (prob_n == 0) return { point, point, point, confidence_level };
double bias = normal_quantile(prob_n);
double z1 = normal_quantile((1. - confidence_level) / 2.);
auto cumn = [n](double x) -> int {
return std::lround(normal_cdf(x) * n); };
auto a = [bias, accel](double b) { return bias + b / (1. - accel * b); };
double b1 = bias + z1;
double b2 = bias - z1;
double a1 = a(b1);
double a2 = a(b2);
auto lo = (std::max)(cumn(a1), 0);
auto hi = (std::min)(cumn(a2), n - 1);
return { point, resample[lo], resample[hi], confidence_level };
}
double outlier_variance(Estimate<double> mean, Estimate<double> stddev, int n);
struct bootstrap_analysis {
Estimate<double> mean;
Estimate<double> standard_deviation;
double outlier_variance;
};
bootstrap_analysis analyse_samples(double confidence_level, int n_resamples, std::vector<double>::iterator first, std::vector<double>::iterator last);
} // namespace Detail
} // namespace Benchmark
} // namespace Catch
#endif // TWOBLUECUBES_CATCH_DETAIL_ANALYSIS_HPP_INCLUDED
|