sm
/
ostat
1
0
Fork 0
ostat/ostat.go

166 linhas
4.0 KiB
Go

// Package ostat is a go package that implements the efficient, accurate, and
// stable calculation of online statistical quantities. The algorithm comes
// from *The Art of Computer Programing*, vol 2 by Knuth
package ostat
import (
"encoding/json"
"fmt"
"math"
)
// from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm
// These constants are used in initialization of the OnlineStat
const (
// http://en.wikipedia.org/wiki/Statistical_population
Population = iota
// http://en.wikipedia.org/wiki/Sample_(statistics)
Sample
)
// OnlineStat keeps track of online statistics.
type OnlineStat struct {
n uint64
mean float64
m2 float64
Min float64
Max float64
typ uint64
}
// NewSampleStat returns a ready-to-use OnlineStat for calculating sample
// statistics.
//
// For the distinction between NewSampleStat and NewPopulationStat please refer to https://en.wikipedia.org/wiki/Statistical_population
func NewSampleStat() *OnlineStat {
return &OnlineStat{
Min: math.Inf(1),
Max: math.Inf(-1),
typ: Sample,
}
}
// NewPopulationStat returns a ready-to-use OnlineStat for calculating
// population statistics.
//
// For the distinction between NewSampleStat and NewPopulationStat please refer to https://en.wikipedia.org/wiki/Statistical_population
func NewPopulationStat() *OnlineStat {
return &OnlineStat{
Min: math.Inf(1),
Max: math.Inf(-1),
typ: Population,
}
}
// MidStreamStat populates an OnlineStat such that it can pick up where
// a previous one left off.
//
// Let's say you have already stored some values and want to start an
// OnlineStat mid-stream; This is the function for you! Just provide it with
// the data in the sinature, and you'll get a properly initialized OnlineStat.
// N.b. the typ is either ostat.Population or ostat.Sample
func MidStreamStat(n uint64, mean, stddev, min, max float64, typ uint64) *OnlineStat {
return &OnlineStat{
n: n,
mean: mean,
m2: stddev * stddev * float64(n),
Min: min,
Max: max,
typ: typ,
}
}
// Push is how you feed new values into an OnlineStat.
func (os *OnlineStat) Push(v float64) {
os.n++
if v < os.Min {
os.Min = v
}
if v > os.Max {
os.Max = v
}
delta := v - os.mean
os.mean = os.mean + delta/float64(os.n)
os.m2 = os.m2 + delta*(v-os.mean)
}
// Mean as defined by http://en.wikipedia.org/wiki/Expected_value.
func (os *OnlineStat) Mean() float64 {
if os.n == 0 {
return 0.0
}
return os.mean
}
// Variance as defined by http://en.wikipedia.org/wiki/Variance
func (os *OnlineStat) Variance() float64 {
if os.n == 0 {
return 0.0
}
return os.m2 / float64(os.n-os.typ)
}
// StdDev is the standard deviation as defined by
// http://en.wikipedia.org/wiki/Variance
func (os *OnlineStat) StdDev() float64 {
return math.Sqrt(os.Variance())
}
// CI returns a 95% confidence interval
// https://en.wikipedia.org/wiki/Confidence_interval
func (os *OnlineStat) CI() (float64, float64) {
// 95% from http://mathworld.wolfram.com/ConfidenceInterval.html
conf := 1.95996
dev := os.StdDev() / math.Sqrt(float64(os.n))
return os.mean - dev*conf, os.mean + dev*conf
}
// N returns how many values have been Pushed into an OnlineStat.
func (os *OnlineStat) N() uint64 {
return os.n
}
func (os *OnlineStat) String() string {
return fmt.Sprintf(
"%+v",
struct {
n uint64
min float64
max float64
mean float64
variance float64
stdDev float64
}{
n: os.n,
min: os.Min,
max: os.Max,
mean: os.Mean(),
variance: os.Variance(),
stdDev: os.StdDev(),
},
)
}
// MarshalJSON is implemented for convenient encoding to json.
func (os *OnlineStat) MarshalJSON() ([]byte, error) {
s := struct {
N uint64 `json:"n"`
Min float64 `json:"min"`
Max float64 `json:"max"`
Mean float64 `json:"mean"`
Variance float64 `json:"variance"`
StdDev float64 `json:"std_dev"`
}{
N: os.n,
Min: os.Min,
Max: os.Max,
Mean: os.Mean(),
Variance: os.Variance(),
StdDev: os.StdDev(),
}
return json.Marshal(s)
}