File: test_statistics.h

package info (click to toggle)
cataclysm-dda 0.H-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 710,808 kB
  • sloc: cpp: 524,019; python: 11,580; sh: 1,228; makefile: 1,169; xml: 507; javascript: 150; sql: 56; exp: 41; perl: 37
file content (231 lines) | stat: -rw-r--r-- 8,259 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#pragma once
#ifndef CATA_TESTS_TEST_STATISTICS_H
#define CATA_TESTS_TEST_STATISTICS_H

#include <algorithm>
#include <cmath>
#include <iosfwd>
#include <limits>
#include <type_traits>
#include <vector>

#include "cata_catch.h"

// Z-value for confidence interval
constexpr double Z95 = 1.96;
constexpr double Z99 = 2.576;
constexpr double Z99_9 = 3.291;
constexpr double Z99_99 = 3.891;
constexpr double Z99_999 = 4.5;
constexpr double Z99_999_9 = 5.0;

// Useful to specify a range using midpoint +/- ε which is easier to parse how
// wide a range actually is vs just upper and lower
struct epsilon_threshold {
    double midpoint;
    double epsilon;
};

// Upper/lower bound threshold useful for asymmetric thresholds
struct upper_lower_threshold {
    double lower_thresh;
    double upper_thresh;
};

// we cache the margin of error so when adding a new value we must invalidate
// it so it gets calculated a again
constexpr double invalid_err = -1;

template<typename T>
class statistics
{
    private:
        int _types;
        int _n;
        double _sum;
        mutable double _error;
        const double Z_;
        const double Zsq_;
        T _max;
        T _min;
        std::vector< T > samples;
    public:
        explicit statistics( const double Z = Z99_9 ) :
            _types( 0 ), _n( 0 ), _sum( 0 ), _error( invalid_err ),
            Z_( Z ),  Zsq_( Z * Z ), _max( std::numeric_limits<T>::min() ),
            _min( std::numeric_limits<T>::max() ) {}

        void new_type() {
            _types++;
        }
        void add( T new_val ) {
            _error = invalid_err;
            _n++;
            _sum += new_val;
            _max = std::max( _max, new_val );
            _min = std::min( _min, new_val );
            samples.push_back( new_val );
        }

        // Adjusted Wald error is only valid for a discrete binary test. Note
        // because error takes into account population, it is only valid to
        // test against the upper/lower bound.
        //
        // The goal here is to get the most accurate statistics about the
        // smallest sample size feasible.  The tests end up getting run many
        // times over a short period, so any real issue may sometimes get a
        // false positive, but over a series of runs will get shaken out in an
        // obvious way.
        //
        // Outside of this class, this should only be used for debugging
        // purposes.
        template<typename U = T>
        std::enable_if_t< std::is_same_v< U, bool >, double >
        margin_of_error() const {
            if( _error != invalid_err ) {
                return _error;
            }
            // Implementation of outline from https://measuringu.com/ci-five-steps/
            const double adj_numerator = ( Zsq_ / 2 ) + _sum;
            const double adj_denominator = Zsq_ + _n;
            const double adj_proportion = adj_numerator / adj_denominator;
            const double a = adj_proportion * ( 1.0 - adj_proportion );
            const double b = a / adj_denominator;
            const double c = std::sqrt( b );
            _error = c * Z_;
            return _error;
        }
        // Standard error is intended to be used with continuous data samples.
        // We're using an approximation here so it is only appropriate to use
        // the upper/lower bound to test for reasons similar to adjusted Wald
        // error.
        // Outside of this class, this should only be used for debugging purposes.
        // https://measuringu.com/ci-five-steps/
        template<typename U = T>
        std::enable_if_t < ! std::is_same_v< U, bool >, double >
        margin_of_error() const {
            if( _error != invalid_err ) {
                return _error;
            }
            const double std_err = stddev() / std::sqrt( _n );
            _error = std_err * Z_;
            return _error;
        }

        /** Use to continue testing until we are sure whether the result is
         * inside or outside the target.
         *
         * Returns true if the confidence interval partially overlaps the target region.
         */
        bool uncertain_about( const epsilon_threshold &t ) const {
            return !test_threshold( t ) && // Inside target
                   t.midpoint - t.epsilon < upper() && // Below target
                   t.midpoint + t.epsilon > lower(); // Above target
        }

        bool test_threshold( const epsilon_threshold &t ) const {
            return ( t.midpoint - t.epsilon ) < lower() &&
                   ( t.midpoint + t.epsilon ) > upper();
        }
        bool test_threshold( const upper_lower_threshold &t ) const {
            return t.lower_thresh < lower() && t.upper_thresh > upper();
        }
        double upper() const {
            double result = avg() + margin_of_error();
            if( std::is_same<T, bool>::value ) {
                result = std::min( result, 1.0 );
            }
            return result;
        }
        double lower() const {
            double result = avg() - margin_of_error();
            if( std::is_same<T, bool>::value ) {
                result = std::max( result, 0.0 );
            }
            return result;
        }
        // Test if some value is a member of the confidence interval of the
        // sample
        bool test_confidence_interval( const double v ) const {
            return is_within_epsilon( v, margin_of_error() );
        }

        bool is_within_epsilon( const double v, const double epsilon ) const {
            const double average = avg();
            return( average + epsilon > v ) &&
                  ( average - epsilon < v );
        }
        // Theoretically a one-pass formula is more efficient, however because
        // we keep handles onto _sum and _n as class members and calculate them
        // on the fly, a one-pass formula is unnecessary because we're already
        // one pass here.  It may not obvious that even though we're calling
        // the 'average()' function that's what is happening.
        double variance( const bool sample_variance = true ) const {
            double average = avg();
            double sigma_acc = 0;

            for( const T v : samples ) {
                const double diff = v - average;
                sigma_acc += diff * diff;
            }

            if( sample_variance ) {
                return sigma_acc / static_cast<double>( _n - 1 );
            }
            return sigma_acc / static_cast<double>( _n );
        }
        // We should only be interested in the sample deviation most of the
        // time because we can always get more samples.  The way we use tests,
        // we attempt to use the sample data to generalize about the
        // population.
        double stddev( const bool sample_deviation = true ) const {
            return std::sqrt( variance( sample_deviation ) );
        }

        int types() const {
            return _types;
        }
        double sum() const {
            return _sum;
        }
        T max() const {
            return _max;
        }
        T min() const {
            return _min;
        }
        double avg() const {
            return _sum / static_cast<double>( _n );
        }
        int n() const {
            return _n;
        }
        const std::vector<T> &get_samples() const {
            return samples;
        }
};

class BinomialMatcher : public Catch::MatcherBase<int>
{
    public:
        BinomialMatcher( int num_samples, double p, double max_deviation );
        bool match( const int &obs ) const override;
        std::string describe() const override;
    private:
        int num_samples_;
        double p_;
        double max_deviation_;
        double expected_;
        double margin_;
};

// Can be used to test that a value is a plausible observation from a binomial
// distribution.  Uses a normal approximation to the binomial, and permits a
// deviation up to max_deviation (measured in standard deviations).
inline BinomialMatcher IsBinomialObservation(
    const int num_samples, const double p, const double max_deviation = Z99_999 )
{
    return BinomialMatcher( num_samples, p, max_deviation );
}

#endif // CATA_TESTS_TEST_STATISTICS_H