1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
|
package dstream
import (
"math"
)
// Stats contains summary statistics for a float64 Dstream variable.
type Stats struct {
// The mean value
Mean float64
// The minimum value
Min float64
// The maximum value
Max float64
// The standard deviation of the values
SD float64
// The number of non inf/nan values
N int
// The number of Nan values
NaN int
// The number of Inf values
Inf int
}
// Describe computes summary statistics for the float64 columns of a dstream.
func Describe(data Dstream) map[string]Stats {
data.Reset()
p := data.NumVar()
stats := make([]Stats, p)
first := true
// Get the min, max and sum.
for data.Next() {
for j := 0; j < p; j++ {
u := data.GetPos(j)
x, ok := u.([]float64)
if !ok {
continue
}
for i, y := range x {
if math.IsNaN(y) {
stats[j].NaN++
continue
}
if math.IsInf(y, 0) {
stats[j].Inf++
continue
}
stats[j].N++
stats[j].Mean += y
if (first && i == 0) || y < stats[j].Min {
stats[j].Min = y
}
if (first && i == 0) || y > stats[j].Max {
stats[j].Max = y
}
}
}
first = false
}
// Convert sum to mean.
for j := range stats {
stats[j].Mean /= float64(stats[j].N)
}
// Get the standard deviation.
data.Reset()
for data.Next() {
for j := 0; j < p; j++ {
u := data.GetPos(j)
x, ok := u.([]float64)
if !ok {
continue
}
for _, y := range x {
u := y - stats[j].Mean
stats[j].SD += u * u
}
}
}
// Convert sum of squares to SD.
for j := range stats {
stats[j].SD = math.Sqrt(stats[j].SD / float64(stats[j].N))
}
// Put the statistics into a map indexed by variable names.
stm := make(map[string]Stats)
names := data.Names()
for j := 0; j < p; j++ {
stm[names[j]] = stats[j]
}
return stm
}
|