File: describe.go

package info (click to toggle)
golang-github-montanaflynn-stats 0.7.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 460 kB
  • sloc: makefile: 27
file content (81 lines) | stat: -rw-r--r-- 2,611 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
package stats

import "fmt"

// Holds information about the dataset provided to Describe
type Description struct {
	Count                  int
	Mean                   float64
	Std                    float64
	Max                    float64
	Min                    float64
	DescriptionPercentiles []descriptionPercentile
	AllowedNaN             bool
}

// Specifies percentiles to be computed
type descriptionPercentile struct {
	Percentile float64
	Value      float64
}

// Describe generates descriptive statistics about a provided dataset, similar to python's pandas.describe()
func Describe(input Float64Data, allowNaN bool, percentiles *[]float64) (*Description, error) {
	return DescribePercentileFunc(input, allowNaN, percentiles, Percentile)
}

// Describe generates descriptive statistics about a provided dataset, similar to python's pandas.describe()
// Takes in a function to use for percentile calculation
func DescribePercentileFunc(input Float64Data, allowNaN bool, percentiles *[]float64, percentileFunc func(Float64Data, float64) (float64, error)) (*Description, error) {
	var description Description
	description.AllowedNaN = allowNaN
	description.Count = input.Len()

	if description.Count == 0 && !allowNaN {
		return &description, ErrEmptyInput
	}

	// Disregard error, since it cannot be thrown if Count is > 0 and allowNaN is false, else NaN is accepted
	description.Std, _ = StandardDeviation(input)
	description.Max, _ = Max(input)
	description.Min, _ = Min(input)
	description.Mean, _ = Mean(input)

	if percentiles != nil {
		for _, percentile := range *percentiles {
			if value, err := percentileFunc(input, percentile); err == nil || allowNaN {
				description.DescriptionPercentiles = append(description.DescriptionPercentiles, descriptionPercentile{Percentile: percentile, Value: value})
			}
		}
	}

	return &description, nil
}

/*
Represents the Description instance in a string format with specified number of decimals

	count   3
	mean    2.00
	std     0.82
	max     3.00
	min     1.00
	25.00%  NaN
	50.00%  1.50
	75.00%  2.50
	NaN OK  true
*/
func (d *Description) String(decimals int) string {
	var str string

	str += fmt.Sprintf("count\t%d\n", d.Count)
	str += fmt.Sprintf("mean\t%.*f\n", decimals, d.Mean)
	str += fmt.Sprintf("std\t%.*f\n", decimals, d.Std)
	str += fmt.Sprintf("max\t%.*f\n", decimals, d.Max)
	str += fmt.Sprintf("min\t%.*f\n", decimals, d.Min)
	for _, percentile := range d.DescriptionPercentiles {
		str += fmt.Sprintf("%.2f%%\t%.*f\n", percentile.Percentile, decimals, percentile.Value)
	}
	str += fmt.Sprintf("NaN OK\t%t", d.AllowedNaN)
	return str
}