// Package ostat is a go package that implements the efficient, accurate, and // stable calculation of online statistical quantities. The algorithm comes // from *The Art of Computer Programing*, vol 2 by Knuth package ostat import ( "encoding/json" "fmt" "math" ) // from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm // These constants are used in initialization of the OnlineStat const ( // http://en.wikipedia.org/wiki/Statistical_population Population = iota // http://en.wikipedia.org/wiki/Sample_(statistics) Sample ) // OnlineStat keeps track of online statistics. type OnlineStat struct { n uint64 mean float64 m2 float64 Min float64 Max float64 typ uint64 } // NewSampleStat returns a ready-to-use OnlineStat for calculating sample // statistics. // // For the distinction between NewSampleStat and NewPopulationStat please refer to https://en.wikipedia.org/wiki/Statistical_population func NewSampleStat() *OnlineStat { return &OnlineStat{ Min: math.Inf(1), Max: math.Inf(-1), typ: Sample, } } // NewPopulationStat returns a ready-to-use OnlineStat for calculating // population statistics. // // For the distinction between NewSampleStat and NewPopulationStat please refer to https://en.wikipedia.org/wiki/Statistical_population func NewPopulationStat() *OnlineStat { return &OnlineStat{ Min: math.Inf(1), Max: math.Inf(-1), typ: Population, } } // MidStreamStat populates an OnlineStat such that it can pick up where // a previous one left off. // // Let's say you have already stored some values and want to start an // OnlineStat mid-stream; This is the function for you! Just provide it with // the data in the sinature, and you'll get a properly initialized OnlineStat. // N.b. the typ is either ostat.Population or ostat.Sample func MidStreamStat(n uint64, mean, stddev, min, max float64, typ uint64) *OnlineStat { return &OnlineStat{ n: n, mean: mean, m2: stddev * stddev * float64(n), Min: min, Max: max, typ: typ, } } // Push is how you feed new values into an OnlineStat. func (os *OnlineStat) Push(v float64) { os.n++ if v < os.Min { os.Min = v } if v > os.Max { os.Max = v } delta := v - os.mean os.mean = os.mean + delta/float64(os.n) os.m2 = os.m2 + delta*(v-os.mean) } // Mean as defined by http://en.wikipedia.org/wiki/Expected_value. func (os *OnlineStat) Mean() float64 { if os.n == 0 { return 0.0 } return os.mean } // Variance as defined by http://en.wikipedia.org/wiki/Variance func (os *OnlineStat) Variance() float64 { if os.n == 0 { return 0.0 } return os.m2 / float64(os.n-os.typ) } // StdDev is the standard deviation as defined by // http://en.wikipedia.org/wiki/Variance func (os *OnlineStat) StdDev() float64 { return math.Sqrt(os.Variance()) } // CI returns a 95% confidence interval // https://en.wikipedia.org/wiki/Confidence_interval func (os *OnlineStat) CI() (float64, float64) { // 95% from http://mathworld.wolfram.com/ConfidenceInterval.html conf := 1.95996 dev := os.StdDev() / math.Sqrt(float64(os.n)) return os.mean - dev*conf, os.mean + dev*conf } // N returns how many values have been Pushed into an OnlineStat. func (os *OnlineStat) N() uint64 { return os.n } func (os *OnlineStat) String() string { return fmt.Sprintf( "%+v", struct { n uint64 min float64 max float64 mean float64 variance float64 stdDev float64 }{ n: os.n, min: os.Min, max: os.Max, mean: os.Mean(), variance: os.Variance(), stdDev: os.StdDev(), }, ) } // MarshalJSON is implemented for convenient encoding to json. func (os *OnlineStat) MarshalJSON() ([]byte, error) { s := struct { N uint64 `json:"n"` Min float64 `json:"min"` Max float64 `json:"max"` Mean float64 `json:"mean"` Variance float64 `json:"variance"` StdDev float64 `json:"std_dev"` }{ N: os.n, Min: os.Min, Max: os.Max, Mean: os.Mean(), Variance: os.Variance(), StdDev: os.StdDev(), } return json.Marshal(s) }