1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
|
package gohistogram
// Copyright (c) 2013 VividCortex, Inc. All rights reserved.
// Please see the LICENSE file for applicable license terms.
import (
"fmt"
)
type NumericHistogram struct {
bins []bin
maxbins int
total uint64
}
// NewHistogram returns a new NumericHistogram with a maximum of n bins.
//
// There is no "optimal" bin count, but somewhere between 20 and 80 bins
// should be sufficient.
func NewHistogram(n int) *NumericHistogram {
return &NumericHistogram{
bins: make([]bin, 0),
maxbins: n,
total: 0,
}
}
func (h *NumericHistogram) Add(n float64) {
defer h.trim()
h.total++
for i := range h.bins {
if h.bins[i].value == n {
h.bins[i].count++
return
}
if h.bins[i].value > n {
newbin := bin{value: n, count: 1}
head := append(make([]bin, 0), h.bins[0:i]...)
head = append(head, newbin)
tail := h.bins[i:]
h.bins = append(head, tail...)
return
}
}
h.bins = append(h.bins, bin{count: 1, value: n})
}
func (h *NumericHistogram) Quantile(q float64) float64 {
count := q * float64(h.total)
for i := range h.bins {
count -= float64(h.bins[i].count)
if count <= 0 {
return h.bins[i].value
}
}
return -1
}
// CDF returns the value of the cumulative distribution function
// at x
func (h *NumericHistogram) CDF(x float64) float64 {
count := 0.0
for i := range h.bins {
if h.bins[i].value <= x {
count += float64(h.bins[i].count)
}
}
return count / float64(h.total)
}
// Mean returns the sample mean of the distribution
func (h *NumericHistogram) Mean() float64 {
if h.total == 0 {
return 0
}
sum := 0.0
for i := range h.bins {
sum += h.bins[i].value * h.bins[i].count
}
return sum / float64(h.total)
}
// Variance returns the variance of the distribution
func (h *NumericHistogram) Variance() float64 {
if h.total == 0 {
return 0
}
sum := 0.0
mean := h.Mean()
for i := range h.bins {
sum += (h.bins[i].count * (h.bins[i].value - mean) * (h.bins[i].value - mean))
}
return sum / float64(h.total)
}
func (h *NumericHistogram) Count() float64 {
return float64(h.total)
}
// trim merges adjacent bins to decrease the bin count to the maximum value
func (h *NumericHistogram) trim() {
for len(h.bins) > h.maxbins {
// Find closest bins in terms of value
minDelta := 1e99
minDeltaIndex := 0
for i := range h.bins {
if i == 0 {
continue
}
if delta := h.bins[i].value - h.bins[i-1].value; delta < minDelta {
minDelta = delta
minDeltaIndex = i
}
}
// We need to merge bins minDeltaIndex-1 and minDeltaIndex
totalCount := h.bins[minDeltaIndex-1].count + h.bins[minDeltaIndex].count
mergedbin := bin{
value: (h.bins[minDeltaIndex-1].value*
h.bins[minDeltaIndex-1].count +
h.bins[minDeltaIndex].value*
h.bins[minDeltaIndex].count) /
totalCount, // weighted average
count: totalCount, // summed heights
}
head := append(make([]bin, 0), h.bins[0:minDeltaIndex-1]...)
tail := append([]bin{mergedbin}, h.bins[minDeltaIndex+1:]...)
h.bins = append(head, tail...)
}
}
// String returns a string reprentation of the histogram,
// which is useful for printing to a terminal.
func (h *NumericHistogram) String() (str string) {
str += fmt.Sprintln("Total:", h.total)
for i := range h.bins {
var bar string
for j := 0; j < int(float64(h.bins[i].count)/float64(h.total)*200); j++ {
bar += "."
}
str += fmt.Sprintln(h.bins[i].value, "\t", bar)
}
return
}
|