added distinction between population and sample statistic

This commit is contained in:
Stephen McQuay 2013-12-29 00:48:20 -08:00
parent e7e174530e
commit d2fef9128a
2 changed files with 124 additions and 24 deletions

View File

@ -13,12 +13,22 @@ type OnlineStat struct {
m2 float64
Max float64
Min float64
typ int64
}
func NewOnlineStat() *OnlineStat {
func NewSampleStat() *OnlineStat {
return &OnlineStat{
Min: math.Inf(1),
Max: math.Inf(-1),
typ: 1,
}
}
func NewPopulationStat() *OnlineStat {
return &OnlineStat{
Min: math.Inf(1),
Max: math.Inf(-1),
typ: 0,
}
}
@ -47,7 +57,7 @@ func (os *OnlineStat) Variance() (float64, error) {
if os.n == 0 {
return 0.0, errors.New("no data")
}
return os.m2 / float64(os.n-1), nil
return os.m2 / float64(os.n-os.typ), nil
}
func (os *OnlineStat) StdDev() (float64, error) {

View File

@ -5,40 +5,130 @@ import (
"testing"
)
const tolerance = 1e-7
func TestInsert(t *testing.T) {
os := NewOnlineStat()
v := []float64{4, 7, 13, 16}
for _, i := range v {
os.Push(i)
tests := []struct {
samples []float64
min float64
max float64
mean float64
pvariance, svariance float64
pstdev, sstdev float64
}{
{
min: math.Inf(1),
max: math.Inf(-1),
},
{
samples: []float64{4, 7, 13, 16},
min: 4,
max: 16,
mean: 10.0,
pvariance: 22.5,
pstdev: math.Sqrt(22.5),
svariance: 30,
sstdev: math.Sqrt(30.0),
},
{
samples: []float64{10e8 + 4, 10e8 + 7, 10e8 + 13, 10e8 + 16},
min: 10e8 + 4,
max: 10e8 + 16,
mean: 10e8 + 10.0,
pvariance: 22.5,
pstdev: math.Sqrt(22.5),
svariance: 30,
sstdev: math.Sqrt(30.0),
},
{
samples: []float64{10e9 + 4, 10e9 + 7, 10e9 + 13, 10e9 + 16},
min: 10e9 + 4,
max: 10e9 + 16,
mean: 10e9 + 10.0,
pvariance: 22.5,
pstdev: math.Sqrt(22.5),
svariance: 30,
sstdev: math.Sqrt(30.0),
},
{
samples: []float64{119., 480., 900., -561., 664., -652., 549., -342., -754., 983., -485., 6., 572., 683., -111., 400., -179., 60., 142., 253., -330., -886., -120., 590., 465., -374., 299., -32., -794., -107., -531., -649., -877., 114., 179., 704., 508., -210., -128., 147., 654., -251., -337., 643., 865., 530., 535., 534., 528., -115., -645., 55., -584., 104., -556., 496., -863., 483., 145., 578., 318., -611., 290., 178., -25., -792., -45., 221., -172., 491., 911., 904., 523., 778., -484., 230., -897., -97., 316., -255., -749., -737., 709., -74., 48., 839., 428., -560., -613., -639., 371., 948., -966., -802., -618., -753., 835., -372., -492., 89.},
min: -966.0,
max: 983.0,
mean: 21.68,
pvariance: 293082.29760000011,
pstdev: 541.3707579838424,
svariance: 296042.72484848,
sstdev: 544.0980838493,
},
{
samples: []float64{36.22727273, -8., -0.79775281, -1.14772727, 29.03333333, -19.53571429, 0.49090909, -0.41666667, -25.92, -10.77464789, 67.71428571, -39.28571429, 10.05154639, -46., 7.91891892, -9.92, -10.30769231, -11.20634921, 13.85, -9.19565217, -16.9, -2.725, 14.32142857, -18.64285714, 9.70238095, 5.92307692, 15.15789474, 8.22368421, 3.56179775, 5.87368421, -16.68, -104.57142857, 6.42352941, -5.14893617, -4.925, -13.31818182, 7.81538462, -5.01492537, -15.35483871, 7.08421053, -4.47777778, 7.97727273, 10.09574468, 10.56521739, 7.67777778, -57.5, 9.03773585, -2.0989011, 12.34482759, -85.5, -107., -4.89473684, 0.90217391, 13.2, 7.31111111, -1.98611111, -7.3375, 9.80722892, -5.88043478, -404., 47.33333333, 16.85416667, -3.06410256, -72.85714286, 29.08333333, 13.58333333, -11.84615385, -14.36363636, 10.25373134, 3.92207792, 11.14634146, -16.15151515, -10.16363636, 13.13513514, -4.55223881, 14.45, 44.88888889, 5.36781609, 1.29591837, 176.5, 18.95652174, 1.85416667, 8.4125, -168.33333333, 2.97590361, 5.45555556, 5.20930233, -7.11764706, 26.53846154, -16.94285714, 17.18181818, 32.86666667, -22.375, 10.14285714, -51.625, -20.72916667, 0.4516129, -5.47058824, -17.81818182, 9.},
min: -404.0,
max: 176.5,
mean: -6.5472287624739636,
pvariance: 2864.1893930503825,
pstdev: 53.518122099438266,
svariance: 2893.1205990296,
sstdev: 53.787736511491,
},
}
if os.Min != 4 {
t.Errorf("incorrectly calculated min")
}
if os.Max != 16 {
t.Errorf("incorrectly calculated max")
}
if m, _ := os.Mean(); m != 10.0 {
t.Errorf("incorrect mean")
}
if variance, _ := os.Variance(); variance != 30.0 {
t.Errorf("incorrect variance: %f", variance)
}
if stdev, _ := os.StdDev(); stdev != math.Sqrt(30.0) {
t.Errorf("incorrect stdev: %f", stdev)
for _, test := range tests {
ps := NewPopulationStat()
ss := NewSampleStat()
for _, i := range test.samples {
ps.Push(i)
ss.Push(i)
}
if ps.Min != ss.Min {
t.Errorf("Mins don't match")
}
if ps.Max != ss.Max {
t.Errorf("Maxs don't match")
}
if ps.Min != test.min {
t.Errorf("incorrectly calculated min: %f != %f", ps.Min, test.min)
}
if ps.Max != test.max {
t.Errorf("incorrectly calculated max: %f != %f", ps.Max, test.max)
}
pmean, _ := ps.Mean()
smean, _ := ss.Mean()
if pmean != smean {
t.Errorf("Means don't match")
}
if m, _ := ps.Mean(); math.Abs(m-test.mean) > tolerance {
t.Errorf("incorrect mean: %f != %f", m, test.mean)
}
if variance, _ := ps.Variance(); math.Abs(variance-test.pvariance) > tolerance {
t.Errorf("incorrect variance: %f != %f", variance, test.pvariance)
}
if stdev, _ := ps.StdDev(); math.Abs(stdev-test.pstdev) > tolerance {
t.Errorf("incorrect stdev: %f != %f", stdev, test.pstdev)
}
if variance, _ := ss.Variance(); math.Abs(variance-test.svariance) > tolerance {
t.Errorf("incorrect variance: %f != %f", variance, test.svariance)
}
if stdev, _ := ss.StdDev(); math.Abs(stdev-test.sstdev) > tolerance {
t.Errorf("incorrect stdev: %f != %f", stdev, test.sstdev)
}
}
}
func TestEmpty(t *testing.T) {
os := NewOnlineStat()
_, err := os.Mean()
ps := NewSampleStat()
_, err := ps.Mean()
if err == nil {
t.Errorf("failure to notify the running stat was empty")
}
_, err = os.Variance()
_, err = ps.Variance()
if err == nil {
t.Errorf("failure to notify the running stat was empty")
}
_, err = os.StdDev()
_, err = ps.StdDev()
if err == nil {
t.Errorf("failure to notify the running stat was empty")
}