ostat/ostat.go

131 lines
2.8 KiB
Go
Raw Normal View History

2014-01-01 16:18:55 -08:00
// ostat is a go package that implements the efficient, accurate, and stable
2014-01-01 16:20:52 -08:00
// calculation of online statistical quantities. The algorithm comes from *The
2014-01-01 16:18:55 -08:00
// Art of Computer Programing*, vol 2 by Knuth
2013-12-28 23:56:48 -08:00
package ostat
import (
2014-01-01 12:00:15 -08:00
"fmt"
2013-12-28 23:56:48 -08:00
"math"
)
// from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm
2014-01-01 16:04:33 -08:00
// These constants are used in initialization of the OnlineStat
2014-01-01 12:00:15 -08:00
const (
2014-01-01 16:04:33 -08:00
// http://en.wikipedia.org/wiki/Statistical_population
2014-01-01 12:00:15 -08:00
Population = iota
2014-01-01 16:04:33 -08:00
// http://en.wikipedia.org/wiki/Sample_(statistics)
2014-01-01 12:00:15 -08:00
Sample
)
2013-12-28 23:56:48 -08:00
type OnlineStat struct {
2014-01-01 12:00:15 -08:00
n uint64
2013-12-28 23:56:48 -08:00
mean float64
m2 float64
Min float64
2014-01-01 12:00:15 -08:00
Max float64
typ uint64
2013-12-28 23:56:48 -08:00
}
func NewSampleStat() *OnlineStat {
2013-12-28 23:56:48 -08:00
return &OnlineStat{
Min: math.Inf(1),
Max: math.Inf(-1),
2014-01-01 12:00:15 -08:00
typ: Sample,
}
}
func NewPopulationStat() *OnlineStat {
return &OnlineStat{
Min: math.Inf(1),
Max: math.Inf(-1),
2014-01-01 12:00:15 -08:00
typ: Population,
}
}
2014-01-01 16:04:33 -08:00
// Let's say you have already stored some values and want to start an
// OnlineStat mid-stream; This is the function for you! Just provide it with
// the data in the sinature, and you'll get a properly initialized OnlineStat.
// N.b. the typ is either ostat.Population or ostat.Sample
2014-01-01 12:00:15 -08:00
func MidStreamStat(n uint64, mean, stddev, min, max float64, typ uint64) *OnlineStat {
return &OnlineStat{
n: n,
mean: mean,
m2: stddev * stddev * float64(n),
Min: min,
Max: max,
typ: typ,
2013-12-28 23:56:48 -08:00
}
}
2014-01-01 16:04:33 -08:00
// this is how you feed new values into your OnlineStat.
2013-12-28 23:56:48 -08:00
func (os *OnlineStat) Push(v float64) {
os.n += 1
if v < os.Min {
os.Min = v
}
if v > os.Max {
os.Max = v
}
delta := v - os.mean
os.mean = os.mean + delta/float64(os.n)
os.m2 = os.m2 + delta*(v-os.mean)
}
2014-01-01 16:04:33 -08:00
// http://en.wikipedia.org/wiki/Expected_value
func (os *OnlineStat) Mean() float64 {
2013-12-28 23:56:48 -08:00
if os.n == 0 {
return 0.0
2013-12-28 23:56:48 -08:00
}
return os.mean
2013-12-28 23:56:48 -08:00
}
2014-01-01 16:04:33 -08:00
// http://en.wikipedia.org/wiki/Variance
func (os *OnlineStat) Variance() float64 {
2013-12-28 23:56:48 -08:00
if os.n == 0 {
return 0.0
2013-12-28 23:56:48 -08:00
}
return os.m2 / float64(os.n-os.typ)
2013-12-28 23:56:48 -08:00
}
2014-01-01 16:04:33 -08:00
// http://en.wikipedia.org/wiki/Variance
func (os *OnlineStat) StdDev() float64 {
return math.Sqrt(os.Variance())
2013-12-28 23:56:48 -08:00
}
2014-01-01 12:00:15 -08:00
2014-11-23 00:29:35 -08:00
// CI returns a 95% confidence interval
// https://en.wikipedia.org/wiki/Confidence_interval
2014-11-23 00:27:59 -08:00
func (os *OnlineStat) CI() (float64, float64) {
// 95% from http://mathworld.wolfram.com/ConfidenceInterval.html
conf := 1.95996
dev := os.StdDev() / math.Sqrt(float64(os.n))
return os.mean - dev*conf, os.mean + dev*conf
}
2014-11-24 23:06:31 -08:00
func (os *OnlineStat) N() uint64 {
return os.n
}
2014-01-01 12:00:15 -08:00
func (os *OnlineStat) String() string {
return fmt.Sprintf(
"%+v",
struct {
n uint64
min float64
max float64
mean float64
variance float64
stdDev float64
}{
n: os.n,
min: os.Min,
max: os.Max,
mean: os.Mean(),
variance: os.Variance(),
stdDev: os.StdDev(),
},
)
}