File: statistics.cpp

package info (click to toggle)
vg 1.30.0%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 267,848 kB
  • sloc: cpp: 446,974; ansic: 116,148; python: 22,805; cs: 17,888; javascript: 11,031; sh: 5,866; makefile: 4,039; java: 1,415; perl: 1,303; xml: 442; lisp: 242
file content (658 lines) | stat: -rw-r--r-- 24,568 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
/**
 * \file statistics.cpp
 *
 * Contains implementations of statistical functions
 *
 */

#include "statistics.hpp"

namespace vg {


double median(std::vector<int> &v) {
    size_t n = v.size() / 2;
    std::nth_element(v.begin(), v.begin()+n, v.end());
    int vn = v[n];
    if (v.size()%2 == 1) {
        return vn;
    } else {
        std::nth_element(v.begin(), v.begin()+n-1, v.end());
        return 0.5*(vn+v[n-1]);
    }
}

// from Python exmaple here:
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
void wellford_update(size_t& count, double& mean, double& M2, double new_val) {
    ++count;
    double delta = new_val - mean;
    mean += delta / (double)count;
    double delta2 = new_val - mean;
    M2 += delta * delta2;
}

pair<double, double> wellford_mean_var(size_t count, double mean, double M2, bool sample_variance) {
    if (count == 0 || (sample_variance && count == 1)) {
        return make_pair(nan(""), nan(""));
    } else {
        return make_pair(mean, M2 / (double)(sample_variance ? count - 1 : count));
    }
}

double phi(double x1, double x2) {
    return (std::erf(x2/std::sqrt(2)) - std::erf(x1/std::sqrt(2)))/2;
}

// Modified from qnorm function in R source:
// https://svn.r-project.org/R/trunk/src/nmath/qnorm.c
double normal_inverse_cdf(double p) {
    assert(0.0 < p && p < 1.0);
    double q, r, val;
    
    q = p - 0.5;
    
    /*-- use AS 241 --- */
    /* double ppnd16_(double *p, long *ifault)*/
    /*      ALGORITHM AS241  APPL. STATIST. (1988) VOL. 37, NO. 3
     
     Produces the normal deviate Z corresponding to a given lower
     tail area of P; Z is accurate to about 1 part in 10**16.
     
     (original fortran code used PARAMETER(..) for the coefficients
     and provided hash codes for checking them...)
     */
    if (fabs(q) <= .425) {/* 0.075 <= p <= 0.925 */
        r = .180625 - q * q;
        val =
        q * (((((((r * 2509.0809287301226727 +
                   33430.575583588128105) * r + 67265.770927008700853) * r +
                 45921.953931549871457) * r + 13731.693765509461125) * r +
               1971.5909503065514427) * r + 133.14166789178437745) * r +
             3.387132872796366608)
        / (((((((r * 5226.495278852854561 +
                 28729.085735721942674) * r + 39307.89580009271061) * r +
               21213.794301586595867) * r + 5394.1960214247511077) * r +
             687.1870074920579083) * r + 42.313330701600911252) * r + 1.);
    }
    else { /* closer than 0.075 from {0,1} boundary */
        
        /* r = min(p, 1-p) < 0.075 */
        if (q > 0)
            r = 1.0 - p;
        else
            r = p;
        
        r = sqrt(- log(r));
        /* r = sqrt(-log(r))  <==>  min(p, 1-p) = exp( - r^2 ) */
        
        if (r <= 5.) { /* <==> min(p,1-p) >= exp(-25) ~= 1.3888e-11 */
            r += -1.6;
            val = (((((((r * 7.7454501427834140764e-4 +
                         .0227238449892691845833) * r + .24178072517745061177) *
                       r + 1.27045825245236838258) * r +
                      3.64784832476320460504) * r + 5.7694972214606914055) *
                    r + 4.6303378461565452959) * r +
                   1.42343711074968357734)
            / (((((((r *
                     1.05075007164441684324e-9 + 5.475938084995344946e-4) *
                    r + .0151986665636164571966) * r +
                   .14810397642748007459) * r + .68976733498510000455) *
                 r + 1.6763848301838038494) * r +
                2.05319162663775882187) * r + 1.);
        }
        else { /* very close to  0 or 1 */
            r += -5.;
            val = (((((((r * 2.01033439929228813265e-7 +
                         2.71155556874348757815e-5) * r +
                        .0012426609473880784386) * r + .026532189526576123093) *
                      r + .29656057182850489123) * r +
                     1.7848265399172913358) * r + 5.4637849111641143699) *
                   r + 6.6579046435011037772)
            / (((((((r *
                     2.04426310338993978564e-15 + 1.4215117583164458887e-7)*
                    r + 1.8463183175100546818e-5) * r +
                   7.868691311456132591e-4) * r + .0148753612908506148525)
                 * r + .13692988092273580531) * r +
                .59983220655588793769) * r + 1.);
        }
        
        if(q < 0.0)
            val = -val;
        /* return (q >= 0.)? r : -r ;*/
    }
    return val;
}

// https://stackoverflow.com/a/19039500/238609
double slope(const std::vector<double>& x, const std::vector<double>& y) {
    const auto n    = x.size();
    const auto s_x  = std::accumulate(x.begin(), x.end(), 0.0);
    const auto s_y  = std::accumulate(y.begin(), y.end(), 0.0);
    const auto s_xx = std::inner_product(x.begin(), x.end(), x.begin(), 0.0);
    const auto s_xy = std::inner_product(x.begin(), x.end(), y.begin(), 0.0);
    const auto a    = (n * s_xy - s_x * s_y) / (n * s_xx - s_x * s_x);
    return a;
}

//https://stats.stackexchange.com/a/7459/14524
// returns alpha parameter of zipf distribution
double fit_zipf(const vector<double>& y) {
    // assume input is log-scaled
    // fit a log-log model
    assert(y.size());
    vector<double> ly(y.size());
    for (int i = 0; i < ly.size(); ++i) {
        //cerr << y[i] << " ";
        ly[i] = log(y[i]);
    }
    //cerr << endl;
    vector<double> lx(y.size());
    for (int i = 1; i <= lx.size(); ++i) {
        lx[i-1] = log(i);
    }
    return -slope(lx, ly);
}

double fit_fixed_shape_max_exponential(const vector<double>& x, double shape, double tolerance) {
    
    // Fit S for a fixed N with the density of the maximum of N exponential variables
    //
    //   NS exp(-Sx) (1 - exp(-Sx))^(N - 1)
    //
    // where S is the rate
    // where N is the shape
    
    double x_sum = 0;
    double x_max = numeric_limits<double>::lowest();
    for (const double& val : x) {
        x_sum += val;
        x_max = max(x_max, val);
    }
    
    // compute the log of the 1st and 2nd derivatives for the log likelihood (split up by positive and negative summands)
    // we have to do it this wonky way because the exponentiated numbers get very large and cause overflow otherwise
    
    double log_deriv_neg_part = log(x_sum);
    
    function<double(double)> log_deriv_pos_part = [&](double rate) {
        double accumulator = numeric_limits<double>::lowest();
        for (const double& val : x) {
            if (val > 0.0) {
                // should always be > 0, but just so we don't blow up on some very small graphs
                accumulator = add_log(accumulator, log(val) - rate * val - log(1.0 - exp(-rate * val)));
            }
        }
        accumulator += log(shape - 1.0);
        return add_log(accumulator, log(x.size() / rate));
    };
    
    function<double(double)> log_deriv2_neg_part = [&](double rate) {
        double accumulator = numeric_limits<double>::lowest();
        for (const double& val : x) {
            if (val > 0.0) {
                // should always be > 0, but just so we don't blow up on some very small graphs
                accumulator = add_log(accumulator, 2.0 * log(val) - rate * val - 2.0 * log(1.0 - exp(-rate * val)));
            }
        }
        accumulator += log(shape - 1.0);
        return add_log(accumulator, log(x.size() / (rate * rate)));
    };
    
    // set a maximum so this doesn't get in an infinite loop even when numerical issues
    // prevent convergence
    size_t max_iters = 1000;
    size_t iter = 0;
    
    // use Newton's method to find the MLE
    double rate = 1.0 / x_max;
    double prev_rate = rate * (1.0 + 10.0 * tolerance);
    while (abs(prev_rate / rate - 1.0) > tolerance && iter < max_iters) {
        prev_rate = rate;
        double log_d2 = log_deriv2_neg_part(rate);
        double log_d_pos = log_deriv_pos_part(rate);
        double log_d_neg = log_deriv_neg_part;
        // determine if the value of the 1st deriv is positive or negative, and compute the
        // whole ratio to the 2nd deriv from the positive and negative parts accordingly
        if (log_d_pos > log_d_neg) {
            rate += exp(subtract_log(log_d_pos, log_d_neg) - log_d2);
        }
        else {
            rate -= exp(subtract_log(log_d_neg, log_d_pos) - log_d2);
        }
        ++iter;
    }
    return rate;
}


double fit_fixed_rate_max_exponential(const vector<double>& x, double rate, double tolerance) {
        
    // Fit N for a fixed S with the density of the maximum of N exponential variables
    //
    //   NS exp(-Sx) (1 - exp(-Sx))^(N - 1)
    //
    // where S is the rate
    // where N is the shape
    
    function<double(double)> log_likelihood = [&](double shape) {
        return max_exponential_log_likelihood(x, rate, shape);
    };
    // expand interval until we find a region where the likelihood is decreasing with
    // shape increasing
    double max_shape = 1.0;
    double max_shape_likelihood = log_likelihood(max_shape);
    double prev_max_shape_likelihood = max_shape_likelihood - 1.0;
    while (prev_max_shape_likelihood <= max_shape_likelihood) {
        prev_max_shape_likelihood = max_shape_likelihood;
        max_shape *= 2.0;
        max_shape_likelihood = log_likelihood(max_shape);
    }
    
    // use golden section search to find the maximum
    return golden_section_search(log_likelihood, 0.0, max_shape, tolerance);
}

pair<double, double> fit_max_exponential(const vector<double>& x,
                                         double tolerance) {

    // set a maximum so this doesn't get in an infinite loop even when numerical issues
    // prevent convergence
    size_t max_iters = 1000;
    size_t iter = 0;
    
    // alternate maximizing shape and rate until convergence
    double shape = 1.0;
    double rate = fit_fixed_shape_max_exponential(x, shape, tolerance / 2.0);
    double prev_shape = shape + 10.0 * tolerance;
    double prev_rate = rate + 10.0 * tolerance;
    while ((abs(prev_rate / rate - 1.0) > tolerance / 2.0
            || abs(prev_shape / shape - 1.0) > tolerance / 2.0)
           && iter < max_iters) {
        prev_shape = shape;
        prev_rate = rate;
        
        shape = fit_fixed_rate_max_exponential(x, rate, tolerance / 2.0);
        rate = fit_fixed_shape_max_exponential(x, shape, tolerance / 2.0);
        
        ++iter;
    }
    
    return pair<double, double>(rate, shape);
}

//tuple<double, double, double> fit_offset_max_exponential(const vector<double>& x,
//                                                         const function<double(double)>& shape_prior,
//                                                         double tolerance) {
//
//    // the max log likelihood of the data for a fixed location parameter
//    function<double(double)> fit_log_likelihood = [&](double loc) {
//        vector<double> x_offset(x.size());
//        for (size_t i = 0; i < x.size(); ++i) {
//            x_offset[i] = x[i] - loc;
//        }
//        pair<double, double> params = fit_max_exponential(x_offset);
//        return max_exponential_log_likelihood(x, params.first, params.second, loc) + log(shape_prior(shape));
//    };
//
//    // the maximum value of location so that all data points are in the support
//    double max_loc = *min_element(x.begin(), x.end());
//    // search with exponentially expanding windows backward to find the window
//    // that contains the highest likelihood MLE for the location
//    double min_loc = max_loc - 1.0;
//    double log_likelihood = numeric_limits<double>::lowest();
//    double probe_log_likelihood = fit_log_likelihood(min_loc);
//    while (probe_log_likelihood > log_likelihood) {
//        log_likelihood = probe_log_likelihood;
//        double probe_loc = max_loc - 2.0 * (max_loc - min_loc);
//        probe_log_likelihood = fit_log_likelihood(probe_loc);
//        min_loc = probe_loc;
//    }
//
//    // find the MLE location
//    double location = golden_section_search(fit_log_likelihood, min_loc, max_loc, tolerance);
//
//    // fit the scale and shape given the locatino
//    vector<double> x_offset(x.size());
//    for (size_t i = 0; i < x.size(); ++i) {
//        x_offset[i] = x[i] - location;
//    }
//    auto params = fit_max_exponential(x_offset);
//
//    return make_tuple(params.first, params.second, location);
//}

double max_exponential_log_likelihood(const vector<double>& x, double rate, double shape,
                                      double location) {
    double accumulator_1 = 0.0;
    double accumulator_2 = 0.0;
    for (const double& val : x) {
        if (val <= location) {
            // this should be -inf, but doing this avoids some numerical problems
            continue;
        }
        accumulator_1 += log(1.0 - exp(-rate * (val - location)));
        accumulator_2 += (val - location);
    }
    return x.size() * log(rate * shape) - rate * accumulator_2 + (shape - 1.0) * accumulator_1;
}

pair<double, double> fit_weibull(const vector<double>& x) {
    // Method adapted from Datsiou & Overend (2018) Weibull parameter estimation and
    // goodness-of-fit for glass strength data
    
    assert(x.size() >= 3);
    
    vector<double> x_local = x;
    sort(x_local.begin(), x_local.end());
    
    // regress the transformed ordered data points against the inverse CDF
    vector<vector<double>> X(x_local.size() - 1, vector<double>(2, 1.0));
    vector<double> y(X.size());
    for (size_t i = 1; i < x_local.size(); ++i) {
        X[i - 1][1] = log(x_local[i]);
        y[i] = log(-log(1.0 - double(i) / double(x.size())));
    }
    vector<double> coefs = regress(X, y);
    
    // convert the coefficients into the parameters
    return make_pair(exp(-coefs[0] / coefs[1]), coefs[1]);
}

tuple<double, double, double> fit_offset_weibull(const vector<double>& x,
                                                 double tolerance) {
    
    // the max log likelihood of the data for a fixed location parameter
    function<double(double)> fit_log_likelihood = [&](double loc) {
        vector<double> x_offset(x.size());
        for (size_t i = 0; i < x.size(); ++i) {
            x_offset[i] = x[i] - loc;
        }
        pair<double, double> params = fit_weibull(x_offset);
        return weibull_log_likelihood(x, params.first, params.second, loc);
    };
    
    // the maximum value of location so that all data points are in the support
    double max_loc = *min_element(x.begin(), x.end());
    
    // search with exponentially expanding windows backward to find the window
    // that contains the highest likelihood MLE for the location
    double min_loc = max_loc - 1.0;
    double log_likelihood = numeric_limits<double>::lowest();
    double probe_log_likelihood = fit_log_likelihood(min_loc);
    while (probe_log_likelihood > log_likelihood) {
        log_likelihood = probe_log_likelihood;
        double probe_loc = max_loc - 2.0 * (max_loc - min_loc);
        probe_log_likelihood = fit_log_likelihood(probe_loc);
        min_loc = probe_loc;
    }
    
    // find the MLE location
    double location = golden_section_search(fit_log_likelihood, min_loc, max_loc, tolerance);
    
    // fit the scale and shape given the locatino
    vector<double> x_offset(x.size());
    for (size_t i = 0; i < x.size(); ++i) {
        x_offset[i] = x[i] - location;
    }
    auto params = fit_weibull(x_offset);
    
    return make_tuple(params.first, params.second, location);
}

double weibull_log_likelihood(const vector<double>& x, double scale, double shape,
                              double location) {
    double sum_1 = 0.0, sum_2 = 0.0;
    for (const double& val : x) {
        sum_1 += log(val - location);
        sum_2 += pow((val - location) / scale, shape);
    }
    return x.size() * (log(shape) - shape * log(scale)) + (shape - 1.0) * sum_1 - sum_2;
}

double golden_section_search(const function<double(double)>& f, double x_min, double x_max,
                             double tolerance) {
     
    const static double inv_phi = (sqrt(5.0) - 1.0) / 2.0;
    
    // the number of steps needed to achieve the required precision (precalculating avoids
    // fiddly floating point issues on the breakout condition)
    size_t steps = size_t(ceil(log(tolerance / (x_max - x_min)) / log(inv_phi)));
    
    // the two interior points we will evaluate the function at
    double x_lo = x_min + inv_phi * inv_phi * (x_max - x_min);
    double x_hi = x_min + inv_phi * (x_max - x_min);
    
    // the function value at the two interior points
    double f_lo = f(x_lo);
    double f_hi = f(x_hi);
    
    for (size_t step = 0; step < steps; ++step) {
        if (f_lo < f_hi) {
            // there is a max in one of the right two sections
            x_min = x_lo;
            x_lo = x_hi;
            x_hi = x_min + inv_phi * (x_max - x_min);
            f_lo = f_hi;
            f_hi = f(x_hi);
        }
        else {
            // there is a max in one of the left two sections
            x_max = x_hi;
            x_hi = x_lo;
            x_lo = x_min + inv_phi * inv_phi * (x_max - x_min);
            f_hi = f_lo;
            f_lo = f(x_lo);
        }
    }
    
    // return the midpoint of the interval we narrowed down to
    if (f_lo > f_hi) {
        return (x_min + x_hi) / 2.0;
    }
    else {
        return (x_lo + x_max) / 2.0;
    }
}

double phred_to_prob(uint8_t phred) {
    // Use a statically initialized lookup table
    static std::vector<double> prob_by_phred([](void) -> std::vector<double> {
        std::vector<double> to_return;
        to_return.reserve((int)numeric_limits<uint8_t>::max() + 1);
        for (int i = 0; i <= numeric_limits<uint8_t>::max(); i++) {
            to_return.push_back(phred_to_prob((double) i));
        }
        return to_return;
    }());
    
    // Look up in it
    return prob_by_phred[phred];
}

double phred_for_at_least_one(size_t p, size_t n) {

    /**
     * Assume that we have n <= MAX_AT_LEAST_ONE_EVENTS independent events with probability p each.
     * Let x be the AT_LEAST_ONE_PRECISION most significant bits of p. Then
     *
     *   phred_at_least_one[(n << AT_LEAST_ONE_PRECISION) + x]
     *
     * is an approximate phred score of at least one event occurring.
     *
     * We exploit the magical thread-safety of static local initialization to
     * fill this in exactly once when needed.
     */
    static std::vector<double> phred_at_least_one([](void) -> std::vector<double> {
        // Initialize phred_at_least_one by copying from the result of this function.
        std::vector<double> to_return;
        size_t values = static_cast<size_t>(1) << AT_LEAST_ONE_PRECISION;
        to_return.resize((MAX_AT_LEAST_ONE_EVENTS + 1) * values, 0.0);
        for (size_t n = 1; n <= MAX_AT_LEAST_ONE_EVENTS; n++) {
            for (size_t p = 0; p < values; p++) {
                // Because each p represents a range of probabilities, we choose a value
                // in the middle for the approximation.
                double probability = (2 * p + 1) / (2.0 * values);
                // Phred for at least one out of n.
                to_return[(n << AT_LEAST_ONE_PRECISION) + p] = prob_to_phred(1.0 - std::pow(1.0 - probability, n));
            }
        }
        return to_return;
    }());
    
    // Make sure we don't go out of bounds.
    assert(n <= MAX_AT_LEAST_ONE_EVENTS);
    
    p >>= 8 * sizeof(size_t) - AT_LEAST_ONE_PRECISION;
    return phred_at_least_one[(n << AT_LEAST_ONE_PRECISION) + p];
}

// This is just like phred_for_at_least_one but we don't prob_to_phred
// TODO: combine the code somehow?
double prob_for_at_least_one(size_t p, size_t n) {

    /**
     * Assume that we have n <= MAX_AT_LEAST_ONE_EVENTS independent events with probability p each.
     * Let x be the AT_LEAST_ONE_PRECISION most significant bits of p. Then
     *
     *   prob_at_least_one[(n << AT_LEAST_ONE_PRECISION) + x]
     *
     * is an approximate probability of at least one event occurring.
     *
     * We exploit the magical thread-safety of static local initialization to
     * fill this in exactly once when needed.
     */
    static std::vector<double> prob_at_least_one([](void) -> std::vector<double> {
        // Initialize prob_at_least_one by copying from the result of this function.
        std::vector<double> to_return;
        size_t values = static_cast<size_t>(1) << AT_LEAST_ONE_PRECISION;
        to_return.resize((MAX_AT_LEAST_ONE_EVENTS + 1) * values, 0.0);
        for (size_t n = 1; n <= MAX_AT_LEAST_ONE_EVENTS; n++) {
            for (size_t p = 0; p < values; p++) {
                // Because each p represents a range of probabilities, we choose a value
                // in the middle for the approximation.
                double probability = (2 * p + 1) / (2.0 * values);
                // Prob for at least one out of n.
                to_return[(n << AT_LEAST_ONE_PRECISION) + p] = 1.0 - std::pow(1.0 - probability, n);
            }
        }
        return to_return;
    }());
    
    // Make sure we don't go out of bounds.
    assert(n <= MAX_AT_LEAST_ONE_EVENTS);
    
    p >>= 8 * sizeof(size_t) - AT_LEAST_ONE_PRECISION;
    return prob_at_least_one[(n << AT_LEAST_ONE_PRECISION) + p];
}

vector<vector<double>> transpose(const vector<vector<double>>& A) {
    vector<vector<double>> AT(A.front().size());
    for (size_t i = 0; i < AT.size(); ++i) {
        AT[i].resize(A.size());
        for (size_t j = 0; j < A.size(); ++j) {
            AT[i][j] = A[j][i];
        }
    }
    return AT;
}

vector<vector<double>> matrix_multiply(const vector<vector<double>>& A,
                                       const vector<vector<double>>& B) {
    assert(A.front().size() == B.size());
    
    vector<vector<double>> AB(A.size());
    for (size_t i = 0; i < A.size(); ++i) {
        AB[i].resize(B.front().size(), 0.0);
        for (size_t j = 0; j < B.front().size(); ++j) {
            for (size_t k = 0; k < B.size(); ++k) {
                AB[i][j] += A[i][k] * B[k][j];
            }
        }
    }
    return AB;
}

vector<double> matrix_multiply(const vector<vector<double>>& A,
                               const vector<double>& b) {
    assert(A.front().size() == b.size());
    
    vector<double> Ab(A.size(), 0.0);
    for (size_t i = 0; i < A.size(); ++i) {
        for (size_t j = 0; j < A.front().size(); ++j) {
            Ab[i] += A[i][j] * b[j];
        }
    }
    return Ab;
}

vector<vector<double>> matrix_invert(const vector<vector<double>>& A) {
    
    // invert by Gaussian elimination
    
    assert(A.front().size() == A.size());
    
    vector<vector<double>> A_inv(A.size());
    
    for (size_t i = 0; i < A.size(); ++i) {
        A_inv[i].resize(A.size(), 0.0);
        A_inv[i][i] = 1.0;
    }
    
    // a non-const local copy
    auto A_loc = A;
    
    // forward loop, make upper triangular
    
    for (int64_t i = 0; i < A_loc.size(); ++i) {
        int64_t ii = i;
        while (A_loc[ii][i] == 0.0 && ii < A_loc.size()) {
            ++ii;
        }
        if (ii == A_loc.size()) {
            std::runtime_error("error: matrix is not invertible!");
            
        }
        swap(A_loc[i],A_loc[ii]);
        swap(A_inv[i], A_inv[ii]);
                
        // make the diagonal entry 1
        double factor = A_loc[i][i];
        for (int64_t j = 0; j < A_loc.size(); ++j) {
            A_loc[i][j] /= factor;
            A_inv[i][j] /= factor;
        }
        
        // make the off diagonals in one column 0's
        for (ii = i + 1; ii < A_loc.size(); ++ii) {
            factor = A_loc[ii][i];
            for (size_t j = 0; j < A_loc.size(); ++j) {
                A_loc[ii][j] -= factor * A_loc[i][j];
                A_inv[ii][j] -= factor * A_inv[i][j];
            }
            
        }
    }
    
    // backward loop, make identity
    
    for (int64_t i = A_loc.size() - 1; i >= 0; --i) {
        // make the off diagonals in one column 0's
        for (int64_t ii = i - 1; ii >= 0; --ii) {
            double factor = A_loc[ii][i];
            for (size_t j = 0; j < A_loc.size(); ++j) {
                A_loc[ii][j] -= factor * A_loc[i][j];
                A_inv[ii][j] -= factor * A_inv[i][j];
            }
        }
    }
    
    return A_inv;
}

vector<double> regress(const vector<vector<double>>& X, vector<double>& y) {
    auto X_t = transpose(X);
    return matrix_multiply(matrix_multiply(matrix_invert(matrix_multiply(X_t, X)), X_t), y);
}

}