File: describe.go

package info (click to toggle)
golang-github-kshedden-dstream 0.0~git20190512.c4c4106-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 596 kB
  • sloc: makefile: 30
file content (114 lines) | stat: -rw-r--r-- 1,815 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
package dstream

import (
	"math"
)

// Stats contains summary statistics for a float64 Dstream variable.
type Stats struct {

	// The mean value
	Mean float64

	// The minimum value
	Min float64

	// The maximum value
	Max float64

	// The standard deviation of the values
	SD float64

	// The number of non inf/nan values
	N int

	// The number of Nan values
	NaN int

	// The number of Inf values
	Inf int
}

// Describe computes summary statistics for the float64 columns of a dstream.
func Describe(data Dstream) map[string]Stats {

	data.Reset()

	p := data.NumVar()
	stats := make([]Stats, p)
	first := true

	// Get the min, max and sum.
	for data.Next() {
		for j := 0; j < p; j++ {
			u := data.GetPos(j)
			x, ok := u.([]float64)
			if !ok {
				continue
			}

			for i, y := range x {

				if math.IsNaN(y) {
					stats[j].NaN++
					continue
				}

				if math.IsInf(y, 0) {
					stats[j].Inf++
					continue
				}

				stats[j].N++

				stats[j].Mean += y

				if (first && i == 0) || y < stats[j].Min {
					stats[j].Min = y
				}

				if (first && i == 0) || y > stats[j].Max {
					stats[j].Max = y
				}
			}
		}

		first = false
	}

	// Convert sum to mean.
	for j := range stats {
		stats[j].Mean /= float64(stats[j].N)
	}

	// Get the standard deviation.
	data.Reset()
	for data.Next() {
		for j := 0; j < p; j++ {
			u := data.GetPos(j)
			x, ok := u.([]float64)
			if !ok {
				continue
			}

			for _, y := range x {
				u := y - stats[j].Mean
				stats[j].SD += u * u
			}
		}
	}

	// Convert sum of squares to SD.
	for j := range stats {
		stats[j].SD = math.Sqrt(stats[j].SD / float64(stats[j].N))
	}

	// Put the statistics into a map indexed by variable names.
	stm := make(map[string]Stats)
	names := data.Names()
	for j := 0; j < p; j++ {
		stm[names[j]] = stats[j]
	}

	return stm
}