a go package that implements the efficient, accurate, and stable calculation of online statistical quantities.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

166 lines
4.0KB

  1. // Package ostat is a go package that implements the efficient, accurate, and
  2. // stable calculation of online statistical quantities. The algorithm comes
  3. // from *The Art of Computer Programing*, vol 2 by Knuth
  4. package ostat
  5. import (
  6. "encoding/json"
  7. "fmt"
  8. "math"
  9. )
  10. // from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm
  11. // These constants are used in initialization of the OnlineStat
  12. const (
  13. // http://en.wikipedia.org/wiki/Statistical_population
  14. Population = iota
  15. // http://en.wikipedia.org/wiki/Sample_(statistics)
  16. Sample
  17. )
  18. // OnlineStat keeps track of online statistics.
  19. type OnlineStat struct {
  20. n uint64
  21. mean float64
  22. m2 float64
  23. Min float64
  24. Max float64
  25. typ uint64
  26. }
  27. // NewSampleStat returns a ready-to-use OnlineStat for calculating sample
  28. // statistics.
  29. //
  30. // For the distinction between NewSampleStat and NewPopulationStat please refer to https://en.wikipedia.org/wiki/Statistical_population
  31. func NewSampleStat() *OnlineStat {
  32. return &OnlineStat{
  33. Min: math.Inf(1),
  34. Max: math.Inf(-1),
  35. typ: Sample,
  36. }
  37. }
  38. // NewPopulationStat returns a ready-to-use OnlineStat for calculating
  39. // population statistics.
  40. //
  41. // For the distinction between NewSampleStat and NewPopulationStat please refer to https://en.wikipedia.org/wiki/Statistical_population
  42. func NewPopulationStat() *OnlineStat {
  43. return &OnlineStat{
  44. Min: math.Inf(1),
  45. Max: math.Inf(-1),
  46. typ: Population,
  47. }
  48. }
  49. // MidStreamStat populates an OnlineStat such that it can pick up where
  50. // a previous one left off.
  51. //
  52. // Let's say you have already stored some values and want to start an
  53. // OnlineStat mid-stream; This is the function for you! Just provide it with
  54. // the data in the sinature, and you'll get a properly initialized OnlineStat.
  55. // N.b. the typ is either ostat.Population or ostat.Sample
  56. func MidStreamStat(n uint64, mean, stddev, min, max float64, typ uint64) *OnlineStat {
  57. return &OnlineStat{
  58. n: n,
  59. mean: mean,
  60. m2: stddev * stddev * float64(n),
  61. Min: min,
  62. Max: max,
  63. typ: typ,
  64. }
  65. }
  66. // Push is how you feed new values into an OnlineStat.
  67. func (os *OnlineStat) Push(v float64) {
  68. os.n++
  69. if v < os.Min {
  70. os.Min = v
  71. }
  72. if v > os.Max {
  73. os.Max = v
  74. }
  75. delta := v - os.mean
  76. os.mean = os.mean + delta/float64(os.n)
  77. os.m2 = os.m2 + delta*(v-os.mean)
  78. }
  79. // Mean as defined by http://en.wikipedia.org/wiki/Expected_value.
  80. func (os *OnlineStat) Mean() float64 {
  81. if os.n == 0 {
  82. return 0.0
  83. }
  84. return os.mean
  85. }
  86. // Variance as defined by http://en.wikipedia.org/wiki/Variance
  87. func (os *OnlineStat) Variance() float64 {
  88. if os.n == 0 {
  89. return 0.0
  90. }
  91. return os.m2 / float64(os.n-os.typ)
  92. }
  93. // StdDev is the standard deviation as defined by
  94. // http://en.wikipedia.org/wiki/Variance
  95. func (os *OnlineStat) StdDev() float64 {
  96. return math.Sqrt(os.Variance())
  97. }
  98. // CI returns a 95% confidence interval
  99. // https://en.wikipedia.org/wiki/Confidence_interval
  100. func (os *OnlineStat) CI() (float64, float64) {
  101. // 95% from http://mathworld.wolfram.com/ConfidenceInterval.html
  102. conf := 1.95996
  103. dev := os.StdDev() / math.Sqrt(float64(os.n))
  104. return os.mean - dev*conf, os.mean + dev*conf
  105. }
  106. // N returns how many values have been Pushed into an OnlineStat.
  107. func (os *OnlineStat) N() uint64 {
  108. return os.n
  109. }
  110. func (os *OnlineStat) String() string {
  111. return fmt.Sprintf(
  112. "%+v",
  113. struct {
  114. n uint64
  115. min float64
  116. max float64
  117. mean float64
  118. variance float64
  119. stdDev float64
  120. }{
  121. n: os.n,
  122. min: os.Min,
  123. max: os.Max,
  124. mean: os.Mean(),
  125. variance: os.Variance(),
  126. stdDev: os.StdDev(),
  127. },
  128. )
  129. }
  130. // MarshalJSON is implemented for convenient encoding to json.
  131. func (os *OnlineStat) MarshalJSON() ([]byte, error) {
  132. s := struct {
  133. N uint64 `json:"n"`
  134. Min float64 `json:"min"`
  135. Max float64 `json:"max"`
  136. Mean float64 `json:"mean"`
  137. Variance float64 `json:"variance"`
  138. StdDev float64 `json:"std_dev"`
  139. }{
  140. N: os.n,
  141. Min: os.Min,
  142. Max: os.Max,
  143. Mean: os.Mean(),
  144. Variance: os.Variance(),
  145. StdDev: os.StdDev(),
  146. }
  147. return json.Marshal(s)
  148. }