File: stats.go

package info (click to toggle)
golang-golang-x-exp 0.0~git20250911.df92998-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 7,284 kB
  • sloc: ansic: 1,900; objc: 276; sh: 270; asm: 48; makefile: 27
file content (195 lines) | stat: -rw-r--r-- 6,118 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package stats provides basic descriptive statistics.
//
// This is intended not as a comprehensive statistics package, but
// to provide common, everyday statistical functions.
//
// As a rule of thumb, a statistical function belongs in this package
// if it would be explained in a typical high school.
//
// These functions aim to balance performance and accuracy, but some
// amount of error is inevitable in floating-point computations.
// The underlying implementations may change, resulting in small
// changes in their results from version to version. If the caller
// needs particular guarantees on accuracy and overflow behavior or
// version stability, they should use a more specialized
// implementation.
package stats

// References:
//
// Hyndman, Rob J.; Fan, Yanan (November 1996).
// "Sample Quantiles in Statistical Packages".
// American Statistician. 50 (4).
// American Statistical Association: 361–365.
// doi:10.2307/2684934. JSTOR 2684934.

import (
	"math"
	"slices"
)

// Mean returns the arithmetic mean of the values in values.
//
// Mean does not modify the array.
//
// Mean panics if values is an empty slice.
//
// If values contains NaN or both Inf and -Inf, it returns NaN.
// If values contains Inf, it returns Inf. If values contains -Inf, it returns -Inf.
func Mean[F ~float64](values []F) F {
	mean, infs := meanInf(values)
	switch infs {
	case negInf:
		return F(math.Inf(-1))
	case posInf:
		return F(math.Inf(1))
	case negInf | posInf:
		return F(math.NaN())
	default: // passthrough mean or NaN
	}
	return mean
}

// MeanAndStdDev returns the arithmetic mean and
// sample standard deviation of values; the standard
// deviation is only defined for len(values) > 1.
//
// MeanAndStdDev does not modify the array.
//
// MeanAndStdDev panics if values is an empty slice.
//
// If values contains NaN, it returns NaN, NaN.
// If values contains both Inf and -Inf, it returns NaN, Inf.
// If values contains Inf, it returns Inf, Inf.
// If values contains -Inf, it returns -Inf, Inf.
func MeanAndStdDev[F ~float64](values []F) (F, F) {
	mean, infs := meanInf(values)
	switch infs {
	case 0:
		if math.IsNaN(float64(mean)) {
			return mean, F(math.NaN())
		}
	case negInf, posInf, negInf | posInf:
		return mean, F(math.Inf(1))
	}
	if len(values) == 1 {
		return mean, 0
	}
	squaredDiffs := F(0.0)
	for _, v := range values {
		diff := v - mean
		squaredDiffs += diff * diff
	}
	return mean, F(math.Sqrt(float64(squaredDiffs) / float64(len(values)-1)))
}

// meanInf calculates a naive mean value
// and reports the infinities status.
func meanInf[F ~float64](values []F) (F, infinities) {
	if len(values) == 0 {
		panic("mean: empty slice")
	}
	sum, infs := F(0.0), infinities(0)
	for _, v := range values {
		switch {
		case math.IsInf(float64(v), 1):
			infs |= posInf
		case math.IsInf(float64(v), -1):
			infs |= negInf
		}
		sum += v
	}
	return F(sum / F(len(values))), infs
}

// infinities is a bitset that records the presence of ±Inf in the input
type infinities uint8

const (
	negInf infinities = 1 << iota
	posInf
)

// Median returns the median of the values in values.
//
// Median does not modify the array.
//
// Median may perform asymptotically faster and allocate
// asymptotically less if the slice is already sorted.
//
// If values is an empty slice, it panics.
// If values contains NaN, it returns NaN.
// -Inf is treated as smaller than all other values,
// Inf is treated as larger than all other values, and
// -0.0 is treated as smaller than 0.0.
func Median[F ~float64](values []F) F { return Quantiles(values, 0.5)[0] }

// Quantiles returns a sequence of quantiles of values.
//
// The returned slice has the same length as the quantiles slice,
// and the elements are one-to-one with the input quantiles.
// A quantile of 0 corresponds to the minimum value in values and
// a quantile of 1 corresponds to the maximum value in values.
// A quantile of 0.5 is the same as the value returned by [Median].
//
// Quantiles does not modify the array.
//
// Quantiles may perform asymptotically faster and allocate
// asymptotically less if the slice is already sorted.
//
// Quantiles panics if values is an empty slice or any
// quantile is not contained in the interval [0, 1].
//
// If values contains NaN, it returns [NaN, ..., NaN].
// -Inf is treated as smaller than all other values,
// Inf is treated as larger than all other values, and
// -0.0 is treated as smaller than 0.0.
func Quantiles[F ~float64](values []F, quantiles ...F) []F {
	if len(values) == 0 {
		panic("quantiles: empty slice")
	}
	if !slices.IsSorted(values) {
		values = slices.Clone(values)
		slices.Sort(values)
	}
	res := make([]F, len(quantiles))
	if math.IsNaN(float64(values[0])) {
		for i := range res {
			res[i] = F(math.NaN())
		}
		return res
	}
	for i, q := range quantiles {
		if !(0 <= q && q <= 1) {
			panic("quantile must be contained in the interval [0, 1]")
		}
		// There are many methods for computing quantiles. Quantiles uses the
		// "inclusive" method, also known as Q7 in Hyndman and Fan, or the
		// "linear" or "R-7" method. This assumes that the data is either a
		// population or a sample that includes the most extreme values of the
		// underlying population.
		res[i] = hyndmanFanR7(values, q)
	}
	return res
}

// hyndmanFanR7 implements the Hyndman and Fan "R-7"
// method of computing interpolated quantile values
// over a sorted slice of vals.
//
// hyndmanFanR7 does not modify the array.
func hyndmanFanR7[F ~float64](values []F, q F) F {
	h := F(len(values)-1)*q + 1
	// the h-th smallest of len(vals) values is at fn(h)-1.
	return values[floor(h-1)] + (h-F(math.Floor(float64(h))))*(values[ceil(h-1)]-values[floor(h-1)])
}

// ceil returns the integer value of [math.Ceil].
func ceil[F ~float64](n F) int { return int(math.Ceil(float64(n))) }

// floor returns the integer value of [math.Floor].
func floor[F ~float64](n F) int { return int(math.Floor(float64(n))) }