2016-07-31 22:41:04 -07:00
|
|
|
// Package ostat is a go package that implements the efficient, accurate, and
|
|
|
|
// stable calculation of online statistical quantities. The algorithm comes
|
|
|
|
// from *The Art of Computer Programing*, vol 2 by Knuth
|
2013-12-28 23:56:48 -08:00
|
|
|
package ostat
|
|
|
|
|
|
|
|
import (
|
2016-07-31 22:14:16 -07:00
|
|
|
"encoding/json"
|
2014-01-01 12:00:15 -08:00
|
|
|
"fmt"
|
2013-12-28 23:56:48 -08:00
|
|
|
"math"
|
|
|
|
)
|
|
|
|
|
|
|
|
// from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm
|
|
|
|
|
2014-01-01 16:04:33 -08:00
|
|
|
// These constants are used in initialization of the OnlineStat
|
2014-01-01 12:00:15 -08:00
|
|
|
const (
|
2014-01-01 16:04:33 -08:00
|
|
|
// http://en.wikipedia.org/wiki/Statistical_population
|
2014-01-01 12:00:15 -08:00
|
|
|
Population = iota
|
2014-01-01 16:04:33 -08:00
|
|
|
|
|
|
|
// http://en.wikipedia.org/wiki/Sample_(statistics)
|
2014-01-01 12:00:15 -08:00
|
|
|
Sample
|
|
|
|
)
|
|
|
|
|
2016-07-31 22:41:04 -07:00
|
|
|
// OnlineStat keeps track of online statistics.
|
2013-12-28 23:56:48 -08:00
|
|
|
type OnlineStat struct {
|
2014-01-01 12:00:15 -08:00
|
|
|
n uint64
|
2013-12-28 23:56:48 -08:00
|
|
|
mean float64
|
|
|
|
m2 float64
|
|
|
|
Min float64
|
2014-01-01 12:00:15 -08:00
|
|
|
Max float64
|
|
|
|
typ uint64
|
2013-12-28 23:56:48 -08:00
|
|
|
}
|
|
|
|
|
2016-07-31 22:41:04 -07:00
|
|
|
// NewSampleStat returns a ready-to-use OnlineStat for calculating sample
|
|
|
|
// statistics.
|
|
|
|
//
|
|
|
|
// For the distinction between NewSampleStat and NewPopulationStat please refer to https://en.wikipedia.org/wiki/Statistical_population
|
2013-12-29 00:48:20 -08:00
|
|
|
func NewSampleStat() *OnlineStat {
|
2013-12-28 23:56:48 -08:00
|
|
|
return &OnlineStat{
|
|
|
|
Min: math.Inf(1),
|
|
|
|
Max: math.Inf(-1),
|
2014-01-01 12:00:15 -08:00
|
|
|
typ: Sample,
|
2013-12-29 00:48:20 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-31 22:41:04 -07:00
|
|
|
// NewPopulationStat returns a ready-to-use OnlineStat for calculating
|
|
|
|
// population statistics.
|
|
|
|
//
|
|
|
|
// For the distinction between NewSampleStat and NewPopulationStat please refer to https://en.wikipedia.org/wiki/Statistical_population
|
2013-12-29 00:48:20 -08:00
|
|
|
func NewPopulationStat() *OnlineStat {
|
|
|
|
return &OnlineStat{
|
|
|
|
Min: math.Inf(1),
|
|
|
|
Max: math.Inf(-1),
|
2014-01-01 12:00:15 -08:00
|
|
|
typ: Population,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-31 22:41:04 -07:00
|
|
|
// MidStreamStat populates an OnlineStat such that it can pick up where
|
|
|
|
// a previous one left off.
|
|
|
|
//
|
2014-01-01 16:04:33 -08:00
|
|
|
// Let's say you have already stored some values and want to start an
|
|
|
|
// OnlineStat mid-stream; This is the function for you! Just provide it with
|
|
|
|
// the data in the sinature, and you'll get a properly initialized OnlineStat.
|
|
|
|
// N.b. the typ is either ostat.Population or ostat.Sample
|
2014-01-01 12:00:15 -08:00
|
|
|
func MidStreamStat(n uint64, mean, stddev, min, max float64, typ uint64) *OnlineStat {
|
|
|
|
return &OnlineStat{
|
|
|
|
n: n,
|
|
|
|
mean: mean,
|
|
|
|
m2: stddev * stddev * float64(n),
|
|
|
|
Min: min,
|
|
|
|
Max: max,
|
|
|
|
typ: typ,
|
2013-12-28 23:56:48 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-31 22:41:04 -07:00
|
|
|
// Push is how you feed new values into an OnlineStat.
|
2013-12-28 23:56:48 -08:00
|
|
|
func (os *OnlineStat) Push(v float64) {
|
2016-07-31 22:41:04 -07:00
|
|
|
os.n++
|
2013-12-28 23:56:48 -08:00
|
|
|
if v < os.Min {
|
|
|
|
os.Min = v
|
|
|
|
}
|
|
|
|
if v > os.Max {
|
|
|
|
os.Max = v
|
|
|
|
}
|
|
|
|
|
|
|
|
delta := v - os.mean
|
|
|
|
os.mean = os.mean + delta/float64(os.n)
|
|
|
|
os.m2 = os.m2 + delta*(v-os.mean)
|
|
|
|
}
|
|
|
|
|
2016-07-31 22:41:04 -07:00
|
|
|
// Mean as defined by http://en.wikipedia.org/wiki/Expected_value.
|
2014-01-01 10:46:34 -08:00
|
|
|
func (os *OnlineStat) Mean() float64 {
|
2013-12-28 23:56:48 -08:00
|
|
|
if os.n == 0 {
|
2014-01-01 10:46:34 -08:00
|
|
|
return 0.0
|
2013-12-28 23:56:48 -08:00
|
|
|
}
|
2014-01-01 10:46:34 -08:00
|
|
|
return os.mean
|
2013-12-28 23:56:48 -08:00
|
|
|
}
|
|
|
|
|
2016-07-31 22:41:04 -07:00
|
|
|
// Variance as defined by http://en.wikipedia.org/wiki/Variance
|
2014-01-01 10:46:34 -08:00
|
|
|
func (os *OnlineStat) Variance() float64 {
|
2013-12-28 23:56:48 -08:00
|
|
|
if os.n == 0 {
|
2014-01-01 10:46:34 -08:00
|
|
|
return 0.0
|
2013-12-28 23:56:48 -08:00
|
|
|
}
|
2014-01-01 10:46:34 -08:00
|
|
|
return os.m2 / float64(os.n-os.typ)
|
2013-12-28 23:56:48 -08:00
|
|
|
}
|
|
|
|
|
2016-07-31 22:41:04 -07:00
|
|
|
// StdDev is the standard deviation as defined by
|
2014-01-01 16:04:33 -08:00
|
|
|
// http://en.wikipedia.org/wiki/Variance
|
2014-01-01 10:46:34 -08:00
|
|
|
func (os *OnlineStat) StdDev() float64 {
|
|
|
|
return math.Sqrt(os.Variance())
|
2013-12-28 23:56:48 -08:00
|
|
|
}
|
2014-01-01 12:00:15 -08:00
|
|
|
|
2014-11-23 00:29:35 -08:00
|
|
|
// CI returns a 95% confidence interval
|
|
|
|
// https://en.wikipedia.org/wiki/Confidence_interval
|
2014-11-23 00:27:59 -08:00
|
|
|
func (os *OnlineStat) CI() (float64, float64) {
|
|
|
|
// 95% from http://mathworld.wolfram.com/ConfidenceInterval.html
|
|
|
|
conf := 1.95996
|
|
|
|
dev := os.StdDev() / math.Sqrt(float64(os.n))
|
|
|
|
return os.mean - dev*conf, os.mean + dev*conf
|
|
|
|
}
|
|
|
|
|
2016-07-31 22:41:04 -07:00
|
|
|
// N returns how many values have been Pushed into an OnlineStat.
|
2014-11-24 23:06:31 -08:00
|
|
|
func (os *OnlineStat) N() uint64 {
|
|
|
|
return os.n
|
|
|
|
}
|
|
|
|
|
2014-01-01 12:00:15 -08:00
|
|
|
func (os *OnlineStat) String() string {
|
|
|
|
return fmt.Sprintf(
|
|
|
|
"%+v",
|
|
|
|
struct {
|
|
|
|
n uint64
|
|
|
|
min float64
|
|
|
|
max float64
|
|
|
|
mean float64
|
|
|
|
variance float64
|
|
|
|
stdDev float64
|
|
|
|
}{
|
|
|
|
n: os.n,
|
|
|
|
min: os.Min,
|
|
|
|
max: os.Max,
|
|
|
|
mean: os.Mean(),
|
|
|
|
variance: os.Variance(),
|
|
|
|
stdDev: os.StdDev(),
|
|
|
|
},
|
|
|
|
)
|
|
|
|
}
|
2016-07-31 22:14:16 -07:00
|
|
|
|
2016-07-31 22:41:04 -07:00
|
|
|
// MarshalJSON is implemented for convenient encoding to json.
|
2016-07-31 22:14:16 -07:00
|
|
|
func (os *OnlineStat) MarshalJSON() ([]byte, error) {
|
|
|
|
s := struct {
|
|
|
|
N uint64 `json:"n"`
|
|
|
|
Min float64 `json:"min"`
|
|
|
|
Max float64 `json:"max"`
|
|
|
|
Mean float64 `json:"mean"`
|
|
|
|
Variance float64 `json:"variance"`
|
|
|
|
StdDev float64 `json:"std_dev"`
|
|
|
|
}{
|
|
|
|
N: os.n,
|
|
|
|
Min: os.Min,
|
|
|
|
Max: os.Max,
|
|
|
|
Mean: os.Mean(),
|
|
|
|
Variance: os.Variance(),
|
|
|
|
StdDev: os.StdDev(),
|
|
|
|
}
|
|
|
|
return json.Marshal(s)
|
|
|
|
}
|