ostat/ostat.go

166 lines
4.0 KiB
Go
Raw Permalink Normal View History

2016-07-31 22:41:04 -07:00
// Package ostat is a go package that implements the efficient, accurate, and
// stable calculation of online statistical quantities. The algorithm comes
// from *The Art of Computer Programing*, vol 2 by Knuth
2013-12-28 23:56:48 -08:00
package ostat
import (
2016-07-31 22:14:16 -07:00
"encoding/json"
2014-01-01 12:00:15 -08:00
"fmt"
2013-12-28 23:56:48 -08:00
"math"
)
// from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm
2014-01-01 16:04:33 -08:00
// These constants are used in initialization of the OnlineStat
2014-01-01 12:00:15 -08:00
const (
2014-01-01 16:04:33 -08:00
// http://en.wikipedia.org/wiki/Statistical_population
2014-01-01 12:00:15 -08:00
Population = iota
2014-01-01 16:04:33 -08:00
// http://en.wikipedia.org/wiki/Sample_(statistics)
2014-01-01 12:00:15 -08:00
Sample
)
2016-07-31 22:41:04 -07:00
// OnlineStat keeps track of online statistics.
2013-12-28 23:56:48 -08:00
type OnlineStat struct {
2014-01-01 12:00:15 -08:00
n uint64
2013-12-28 23:56:48 -08:00
mean float64
m2 float64
Min float64
2014-01-01 12:00:15 -08:00
Max float64
typ uint64
2013-12-28 23:56:48 -08:00
}
2016-07-31 22:41:04 -07:00
// NewSampleStat returns a ready-to-use OnlineStat for calculating sample
// statistics.
//
// For the distinction between NewSampleStat and NewPopulationStat please refer to https://en.wikipedia.org/wiki/Statistical_population
func NewSampleStat() *OnlineStat {
2013-12-28 23:56:48 -08:00
return &OnlineStat{
Min: math.Inf(1),
Max: math.Inf(-1),
2014-01-01 12:00:15 -08:00
typ: Sample,
}
}
2016-07-31 22:41:04 -07:00
// NewPopulationStat returns a ready-to-use OnlineStat for calculating
// population statistics.
//
// For the distinction between NewSampleStat and NewPopulationStat please refer to https://en.wikipedia.org/wiki/Statistical_population
func NewPopulationStat() *OnlineStat {
return &OnlineStat{
Min: math.Inf(1),
Max: math.Inf(-1),
2014-01-01 12:00:15 -08:00
typ: Population,
}
}
2016-07-31 22:41:04 -07:00
// MidStreamStat populates an OnlineStat such that it can pick up where
// a previous one left off.
//
2014-01-01 16:04:33 -08:00
// Let's say you have already stored some values and want to start an
// OnlineStat mid-stream; This is the function for you! Just provide it with
// the data in the sinature, and you'll get a properly initialized OnlineStat.
// N.b. the typ is either ostat.Population or ostat.Sample
2014-01-01 12:00:15 -08:00
func MidStreamStat(n uint64, mean, stddev, min, max float64, typ uint64) *OnlineStat {
return &OnlineStat{
n: n,
mean: mean,
m2: stddev * stddev * float64(n),
Min: min,
Max: max,
typ: typ,
2013-12-28 23:56:48 -08:00
}
}
2016-07-31 22:41:04 -07:00
// Push is how you feed new values into an OnlineStat.
2013-12-28 23:56:48 -08:00
func (os *OnlineStat) Push(v float64) {
2016-07-31 22:41:04 -07:00
os.n++
2013-12-28 23:56:48 -08:00
if v < os.Min {
os.Min = v
}
if v > os.Max {
os.Max = v
}
delta := v - os.mean
os.mean = os.mean + delta/float64(os.n)
os.m2 = os.m2 + delta*(v-os.mean)
}
2016-07-31 22:41:04 -07:00
// Mean as defined by http://en.wikipedia.org/wiki/Expected_value.
func (os *OnlineStat) Mean() float64 {
2013-12-28 23:56:48 -08:00
if os.n == 0 {
return 0.0
2013-12-28 23:56:48 -08:00
}
return os.mean
2013-12-28 23:56:48 -08:00
}
2016-07-31 22:41:04 -07:00
// Variance as defined by http://en.wikipedia.org/wiki/Variance
func (os *OnlineStat) Variance() float64 {
2013-12-28 23:56:48 -08:00
if os.n == 0 {
return 0.0
2013-12-28 23:56:48 -08:00
}
return os.m2 / float64(os.n-os.typ)
2013-12-28 23:56:48 -08:00
}
2016-07-31 22:41:04 -07:00
// StdDev is the standard deviation as defined by
2014-01-01 16:04:33 -08:00
// http://en.wikipedia.org/wiki/Variance
func (os *OnlineStat) StdDev() float64 {
return math.Sqrt(os.Variance())
2013-12-28 23:56:48 -08:00
}
2014-01-01 12:00:15 -08:00
2014-11-23 00:29:35 -08:00
// CI returns a 95% confidence interval
// https://en.wikipedia.org/wiki/Confidence_interval
2014-11-23 00:27:59 -08:00
func (os *OnlineStat) CI() (float64, float64) {
// 95% from http://mathworld.wolfram.com/ConfidenceInterval.html
conf := 1.95996
dev := os.StdDev() / math.Sqrt(float64(os.n))
return os.mean - dev*conf, os.mean + dev*conf
}
2016-07-31 22:41:04 -07:00
// N returns how many values have been Pushed into an OnlineStat.
2014-11-24 23:06:31 -08:00
func (os *OnlineStat) N() uint64 {
return os.n
}
2014-01-01 12:00:15 -08:00
func (os *OnlineStat) String() string {
return fmt.Sprintf(
"%+v",
struct {
n uint64
min float64
max float64
mean float64
variance float64
stdDev float64
}{
n: os.n,
min: os.Min,
max: os.Max,
mean: os.Mean(),
variance: os.Variance(),
stdDev: os.StdDev(),
},
)
}
2016-07-31 22:14:16 -07:00
2016-07-31 22:41:04 -07:00
// MarshalJSON is implemented for convenient encoding to json.
2016-07-31 22:14:16 -07:00
func (os *OnlineStat) MarshalJSON() ([]byte, error) {
s := struct {
N uint64 `json:"n"`
Min float64 `json:"min"`
Max float64 `json:"max"`
Mean float64 `json:"mean"`
Variance float64 `json:"variance"`
StdDev float64 `json:"std_dev"`
}{
N: os.n,
Min: os.Min,
Max: os.Max,
Mean: os.Mean(),
Variance: os.Variance(),
StdDev: os.StdDev(),
}
return json.Marshal(s)
}