File: diffchunk.go

package info (click to toggle)
golang-github-kshedden-dstream 0.0~git20190512.c4c4106-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 596 kB
  • sloc: makefile: 30
file content (137 lines) | stat: -rw-r--r-- 2,415 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
package dstream

import (
	"fmt"
)

type diffChunk struct {
	xform

	order     map[string]int
	maxorder  int
	nobs      int  // total sample size
	nobsKnown bool // indicates whether the sample size is available
	doneInit  bool // init has run
}

// DiffChunk returns a new Dstream in which specified variables are
// differenced.  The differenced values are only computed within a
// chunk, not across chunk boundaries, and the first value of each
// chunk is omitted.
func DiffChunk(data Dstream, order map[string]int) Dstream {
	d := &diffChunk{
		xform: xform{
			source: data,
		},
		order: order,
	}
	d.init()
	return d
}

func (df *diffChunk) init() {
	maxorder := 0
	for _, v := range df.order {
		if v > maxorder {
			maxorder = v
		}
	}
	df.maxorder = maxorder

	// Create the names of the new variables
	var names []string
	for _, a := range df.source.Names() {
		names = append(names, a)
		q := df.order[a]
		if q > 0 {
			b := fmt.Sprintf("%s$d%d", a, q)
			names = append(names, b)
		}
	}

	df.bdata = make([]interface{}, len(names))
	df.names = names
	df.doneInit = true
}

func (df *diffChunk) Nobs() int {
	if df.nobsKnown {
		return df.nobs
	}
	return -1
}

func (df *diffChunk) Next() bool {

	if !df.doneInit {
		df.init()
	}

	if !df.source.Next() {
		df.nobsKnown = true
		return false
	}

	if df.bdata == nil {
		df.bdata = make([]interface{}, len(df.names))
	}

	// Loop over the original data columns
	jj := 0
	maxorder := df.maxorder
	for j, oname := range df.source.Names() {

		v := df.source.GetPos(j)
		if ilen(v) <= maxorder {
			// Segment is too short to use
			continue
		}

		q := df.order[oname]
		switch v := v.(type) {
		case []float64:
			n := len(v)
			df.nobs += n - maxorder
			df.bdata[jj] = v[maxorder:]
			jj++
			if q > 0 {
				var y []float64
				if df.bdata[jj] != nil {
					y = df.bdata[jj].([]float64)
				}
				y = resizeFloat64(y, n)
				copy(y, v)
				y = diff(y, q)
				if q < maxorder {
					y = y[maxorder-q:]
				}
				df.bdata[jj] = y
				jj++
			}
		case []string:
			n := len(v)
			df.nobs += n - maxorder
			df.bdata[jj] = v[maxorder:]
			jj++
		default:
			msg := fmt.Sprintf("unknown data type: %T", v)
			panic(msg)
		}
	}

	return true
}

func diff1(x []float64) []float64 {
	for i := len(x) - 1; i > 0; i-- {
		x[i] -= x[i-1]
	}
	return x[1:]
}

func diff(x []float64, ord int) []float64 {
	for j := 0; j < ord; j++ {
		x = diff1(x)
	}
	return x
}