1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
|
// Package gohistogram contains implementations of weighted and exponential histograms.
package gohistogram
// Copyright (c) 2013 VividCortex, Inc. All rights reserved.
// Please see the LICENSE file for applicable license terms.
import "fmt"
// A WeightedHistogram implements Histogram. A WeightedHistogram has bins that have values
// which are exponentially weighted moving averages. This allows you keep inserting large
// amounts of data into the histogram and approximate quantiles with recency factored in.
type WeightedHistogram struct {
bins []bin
maxbins int
total float64
alpha float64
}
// NewWeightedHistogram returns a new WeightedHistogram with a maximum of n bins with a decay factor
// of alpha.
//
// There is no "optimal" bin count, but somewhere between 20 and 80 bins should be
// sufficient.
//
// Alpha should be set to 2 / (N+1), where N represents the average age of the moving window.
// For example, a 60-second window with an average age of 30 seconds would yield an
// alpha of 0.064516129.
func NewWeightedHistogram(n int, alpha float64) *WeightedHistogram {
return &WeightedHistogram{
bins: make([]bin, 0),
maxbins: n,
total: 0,
alpha: alpha,
}
}
func ewma(existingVal float64, newVal float64, alpha float64) (result float64) {
result = newVal*(1-alpha) + existingVal*alpha
return
}
func (h *WeightedHistogram) scaleDown(except int) {
for i := range h.bins {
if i != except {
h.bins[i].count = ewma(h.bins[i].count, 0, h.alpha)
}
}
}
func (h *WeightedHistogram) Add(n float64) {
defer h.trim()
for i := range h.bins {
if h.bins[i].value == n {
h.bins[i].count++
defer h.scaleDown(i)
return
}
if h.bins[i].value > n {
newbin := bin{value: n, count: 1}
head := append(make([]bin, 0), h.bins[0:i]...)
head = append(head, newbin)
tail := h.bins[i:]
h.bins = append(head, tail...)
defer h.scaleDown(i)
return
}
}
h.bins = append(h.bins, bin{count: 1, value: n})
}
func (h *WeightedHistogram) Quantile(q float64) float64 {
count := q * h.total
for i := range h.bins {
count -= float64(h.bins[i].count)
if count <= 0 {
return h.bins[i].value
}
}
return -1
}
// CDF returns the value of the cumulative distribution function
// at x
func (h *WeightedHistogram) CDF(x float64) float64 {
count := 0.0
for i := range h.bins {
if h.bins[i].value <= x {
count += float64(h.bins[i].count)
}
}
return count / h.total
}
// Mean returns the sample mean of the distribution
func (h *WeightedHistogram) Mean() float64 {
if h.total == 0 {
return 0
}
sum := 0.0
for i := range h.bins {
sum += h.bins[i].value * h.bins[i].count
}
return sum / h.total
}
// Variance returns the variance of the distribution
func (h *WeightedHistogram) Variance() float64 {
if h.total == 0 {
return 0
}
sum := 0.0
mean := h.Mean()
for i := range h.bins {
sum += (h.bins[i].count * (h.bins[i].value - mean) * (h.bins[i].value - mean))
}
return sum / h.total
}
func (h *WeightedHistogram) Count() float64 {
return h.total
}
func (h *WeightedHistogram) trim() {
total := 0.0
for i := range h.bins {
total += h.bins[i].count
}
h.total = total
for len(h.bins) > h.maxbins {
// Find closest bins in terms of value
minDelta := 1e99
minDeltaIndex := 0
for i := range h.bins {
if i == 0 {
continue
}
if delta := h.bins[i].value - h.bins[i-1].value; delta < minDelta {
minDelta = delta
minDeltaIndex = i
}
}
// We need to merge bins minDeltaIndex-1 and minDeltaIndex
totalCount := h.bins[minDeltaIndex-1].count + h.bins[minDeltaIndex].count
mergedbin := bin{
value: (h.bins[minDeltaIndex-1].value*
h.bins[minDeltaIndex-1].count +
h.bins[minDeltaIndex].value*
h.bins[minDeltaIndex].count) /
totalCount, // weighted average
count: totalCount, // summed heights
}
head := append(make([]bin, 0), h.bins[0:minDeltaIndex-1]...)
tail := append([]bin{mergedbin}, h.bins[minDeltaIndex+1:]...)
h.bins = append(head, tail...)
}
}
// String returns a string reprentation of the histogram,
// which is useful for printing to a terminal.
func (h *WeightedHistogram) String() (str string) {
str += fmt.Sprintln("Total:", h.total)
for i := range h.bins {
var bar string
for j := 0; j < int(float64(h.bins[i].count)/float64(h.total)*200); j++ {
bar += "."
}
str += fmt.Sprintln(h.bins[i].value, "\t", bar)
}
return
}
|