// ostat is a go package that implements the efficient, accurate, and stable // calculation of online statistical quantities. The algorithm comes from *The // Art of Computer Programing*, vol 2 by Knuth package ostat import ( "fmt" "math" ) // from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm // These constants are used in initialization of the OnlineStat const ( // http://en.wikipedia.org/wiki/Statistical_population Population = iota // http://en.wikipedia.org/wiki/Sample_(statistics) Sample ) type OnlineStat struct { n uint64 mean float64 m2 float64 Min float64 Max float64 typ uint64 } func NewSampleStat() *OnlineStat { return &OnlineStat{ Min: math.Inf(1), Max: math.Inf(-1), typ: Sample, } } func NewPopulationStat() *OnlineStat { return &OnlineStat{ Min: math.Inf(1), Max: math.Inf(-1), typ: Population, } } // Let's say you have already stored some values and want to start an // OnlineStat mid-stream; This is the function for you! Just provide it with // the data in the sinature, and you'll get a properly initialized OnlineStat. // N.b. the typ is either ostat.Population or ostat.Sample func MidStreamStat(n uint64, mean, stddev, min, max float64, typ uint64) *OnlineStat { return &OnlineStat{ n: n, mean: mean, m2: stddev * stddev * float64(n), Min: min, Max: max, typ: typ, } } // this is how you feed new values into your OnlineStat. func (os *OnlineStat) Push(v float64) { os.n += 1 if v < os.Min { os.Min = v } if v > os.Max { os.Max = v } delta := v - os.mean os.mean = os.mean + delta/float64(os.n) os.m2 = os.m2 + delta*(v-os.mean) } // http://en.wikipedia.org/wiki/Expected_value func (os *OnlineStat) Mean() float64 { if os.n == 0 { return 0.0 } return os.mean } // http://en.wikipedia.org/wiki/Variance func (os *OnlineStat) Variance() float64 { if os.n == 0 { return 0.0 } return os.m2 / float64(os.n-os.typ) } // http://en.wikipedia.org/wiki/Variance func (os *OnlineStat) StdDev() float64 { return math.Sqrt(os.Variance()) } // CI returns a 95% confidence interval // https://en.wikipedia.org/wiki/Confidence_interval func (os *OnlineStat) CI() (float64, float64) { // 95% from http://mathworld.wolfram.com/ConfidenceInterval.html conf := 1.95996 dev := os.StdDev() / math.Sqrt(float64(os.n)) return os.mean - dev*conf, os.mean + dev*conf } func (os *OnlineStat) N() uint64 { return os.n } func (os *OnlineStat) String() string { return fmt.Sprintf( "%+v", struct { n uint64 min float64 max float64 mean float64 variance float64 stdDev float64 }{ n: os.n, min: os.Min, max: os.Max, mean: os.Mean(), variance: os.Variance(), stdDev: os.StdDev(), }, ) }