added distinction between population and sample statistic

2013-12-29 00:48:20 -08:00 · 2013-12-29 00:48:20 -08:00 · d2fef9128a
commit d2fef9128a
parent e7e174530e
2 changed files with 124 additions and 24 deletions
--- a/ostat.go
+++ b/ostat.go
@ -13,12 +13,22 @@ type OnlineStat struct {
 	m2   float64
 	Max  float64
 	Min  float64
+	typ  int64
 }

-func NewOnlineStat() *OnlineStat {
+func NewSampleStat() *OnlineStat {
 	return &OnlineStat{
 		Min: math.Inf(1),
 		Max: math.Inf(-1),
+		typ: 1,
+	}
+}
+
+func NewPopulationStat() *OnlineStat {
+	return &OnlineStat{
+		Min: math.Inf(1),
+		Max: math.Inf(-1),
+		typ: 0,
 	}
 }

@ -47,7 +57,7 @@ func (os *OnlineStat) Variance() (float64, error) {
 	if os.n == 0 {
 		return 0.0, errors.New("no data")
 	}
-	return os.m2 / float64(os.n-1), nil
+	return os.m2 / float64(os.n-os.typ), nil
 }

 func (os *OnlineStat) StdDev() (float64, error) {
--- a/ostat_test.go
+++ b/ostat_test.go
@ -5,40 +5,130 @@ import (
 	"testing"
 )

+const tolerance = 1e-7
+
 func TestInsert(t *testing.T) {
-	os := NewOnlineStat()
-	v := []float64{4, 7, 13, 16}
-	for _, i := range v {
-		os.Push(i)
+	tests := []struct {
+		samples              []float64
+		min                  float64
+		max                  float64
+		mean                 float64
+		pvariance, svariance float64
+		pstdev, sstdev       float64
+	}{
+		{
+			min: math.Inf(1),
+			max: math.Inf(-1),
+		},
+		{
+			samples:   []float64{4, 7, 13, 16},
+			min:       4,
+			max:       16,
+			mean:      10.0,
+			pvariance: 22.5,
+			pstdev:    math.Sqrt(22.5),
+			svariance: 30,
+			sstdev:    math.Sqrt(30.0),
+		},
+		{
+			samples:   []float64{10e8 + 4, 10e8 + 7, 10e8 + 13, 10e8 + 16},
+			min:       10e8 + 4,
+			max:       10e8 + 16,
+			mean:      10e8 + 10.0,
+			pvariance: 22.5,
+			pstdev:    math.Sqrt(22.5),
+			svariance: 30,
+			sstdev:    math.Sqrt(30.0),
+		},
+		{
+			samples:   []float64{10e9 + 4, 10e9 + 7, 10e9 + 13, 10e9 + 16},
+			min:       10e9 + 4,
+			max:       10e9 + 16,
+			mean:      10e9 + 10.0,
+			pvariance: 22.5,
+			pstdev:    math.Sqrt(22.5),
+			svariance: 30,
+			sstdev:    math.Sqrt(30.0),
+		},
+		{
+
+			samples:   []float64{119., 480., 900., -561., 664., -652., 549., -342., -754., 983., -485., 6., 572., 683., -111., 400., -179., 60., 142., 253., -330., -886., -120., 590., 465., -374., 299., -32., -794., -107., -531., -649., -877., 114., 179., 704., 508., -210., -128., 147., 654., -251., -337., 643., 865., 530., 535., 534., 528., -115., -645., 55., -584., 104., -556., 496., -863., 483., 145., 578., 318., -611., 290., 178., -25., -792., -45., 221., -172., 491., 911., 904., 523., 778., -484., 230., -897., -97., 316., -255., -749., -737., 709., -74., 48., 839., 428., -560., -613., -639., 371., 948., -966., -802., -618., -753., 835., -372., -492., 89.},
+			min:       -966.0,
+			max:       983.0,
+			mean:      21.68,
+			pvariance: 293082.29760000011,
+			pstdev:    541.3707579838424,
+			svariance: 296042.72484848,
+			sstdev:    544.0980838493,
+		},
+		{
+			samples:   []float64{36.22727273, -8., -0.79775281, -1.14772727, 29.03333333, -19.53571429, 0.49090909, -0.41666667, -25.92, -10.77464789, 67.71428571, -39.28571429, 10.05154639, -46., 7.91891892, -9.92, -10.30769231, -11.20634921, 13.85, -9.19565217, -16.9, -2.725, 14.32142857, -18.64285714, 9.70238095, 5.92307692, 15.15789474, 8.22368421, 3.56179775, 5.87368421, -16.68, -104.57142857, 6.42352941, -5.14893617, -4.925, -13.31818182, 7.81538462, -5.01492537, -15.35483871, 7.08421053, -4.47777778, 7.97727273, 10.09574468, 10.56521739, 7.67777778, -57.5, 9.03773585, -2.0989011, 12.34482759, -85.5, -107., -4.89473684, 0.90217391, 13.2, 7.31111111, -1.98611111, -7.3375, 9.80722892, -5.88043478, -404., 47.33333333, 16.85416667, -3.06410256, -72.85714286, 29.08333333, 13.58333333, -11.84615385, -14.36363636, 10.25373134, 3.92207792, 11.14634146, -16.15151515, -10.16363636, 13.13513514, -4.55223881, 14.45, 44.88888889, 5.36781609, 1.29591837, 176.5, 18.95652174, 1.85416667, 8.4125, -168.33333333, 2.97590361, 5.45555556, 5.20930233, -7.11764706, 26.53846154, -16.94285714, 17.18181818, 32.86666667, -22.375, 10.14285714, -51.625, -20.72916667, 0.4516129, -5.47058824, -17.81818182, 9.},
+			min:       -404.0,
+			max:       176.5,
+			mean:      -6.5472287624739636,
+			pvariance: 2864.1893930503825,
+			pstdev:    53.518122099438266,
+			svariance: 2893.1205990296,
+			sstdev:    53.787736511491,
+		},
 	}
-	if os.Min != 4 {
-		t.Errorf("incorrectly calculated min")
-	}
-	if os.Max != 16 {
-		t.Errorf("incorrectly calculated max")
-	}
-	if m, _ := os.Mean(); m != 10.0 {
-		t.Errorf("incorrect mean")
-	}
-	if variance, _ := os.Variance(); variance != 30.0 {
-		t.Errorf("incorrect variance: %f", variance)
-	}
-	if stdev, _ := os.StdDev(); stdev != math.Sqrt(30.0) {
-		t.Errorf("incorrect stdev: %f", stdev)
+
+	for _, test := range tests {
+		ps := NewPopulationStat()
+		ss := NewSampleStat()
+		for _, i := range test.samples {
+			ps.Push(i)
+			ss.Push(i)
+		}
+		if ps.Min != ss.Min {
+			t.Errorf("Mins don't match")
+		}
+		if ps.Max != ss.Max {
+			t.Errorf("Maxs don't match")
+		}
+		if ps.Min != test.min {
+			t.Errorf("incorrectly calculated min: %f != %f", ps.Min, test.min)
+		}
+		if ps.Max != test.max {
+			t.Errorf("incorrectly calculated max: %f != %f", ps.Max, test.max)
+		}
+
+		pmean, _ := ps.Mean()
+		smean, _ := ss.Mean()
+		if pmean != smean {
+			t.Errorf("Means don't match")
+		}
+		if m, _ := ps.Mean(); math.Abs(m-test.mean) > tolerance {
+			t.Errorf("incorrect mean: %f != %f", m, test.mean)
+		}
+
+		if variance, _ := ps.Variance(); math.Abs(variance-test.pvariance) > tolerance {
+			t.Errorf("incorrect variance: %f != %f", variance, test.pvariance)
+		}
+		if stdev, _ := ps.StdDev(); math.Abs(stdev-test.pstdev) > tolerance {
+			t.Errorf("incorrect stdev: %f != %f", stdev, test.pstdev)
+		}
+
+		if variance, _ := ss.Variance(); math.Abs(variance-test.svariance) > tolerance {
+			t.Errorf("incorrect variance: %f != %f", variance, test.svariance)
+		}
+		if stdev, _ := ss.StdDev(); math.Abs(stdev-test.sstdev) > tolerance {
+			t.Errorf("incorrect stdev: %f != %f", stdev, test.sstdev)
+		}
 	}
 }

 func TestEmpty(t *testing.T) {
-	os := NewOnlineStat()
-	_, err := os.Mean()
+	ps := NewSampleStat()
+	_, err := ps.Mean()
 	if err == nil {
 		t.Errorf("failure to notify the running stat was empty")
 	}
-	_, err = os.Variance()
+	_, err = ps.Variance()
 	if err == nil {
 		t.Errorf("failure to notify the running stat was empty")
 	}
-	_, err = os.StdDev()
+	_, err = ps.StdDev()
 	if err == nil {
 		t.Errorf("failure to notify the running stat was empty")
 	}