1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
|
package dstream
import (
"fmt"
"sort"
)
type argsort struct {
s []uint64
inds []int
}
func (a argsort) Len() int {
return len(a.s)
}
func (a argsort) Less(i, j int) bool {
return a.s[i] < a.s[j]
}
func (a argsort) Swap(i, j int) {
a.s[i], a.s[j] = a.s[j], a.s[i]
a.inds[i], a.inds[j] = a.inds[j], a.inds[i]
}
// Regroup creates a new Dstream from the provided Dstream having
// identical rows, but with the chunks defined by the values of a
// provided id variable. The resulting Dstream will have a chunk for
// each distinct level of the id variable, containing all the rows of
// the input Dstream with the given id value. The id variable must
// have uint64 type.
func Regroup(ds Dstream, groupvar string, sortchunks bool) Dstream {
// Find the variable's position
idpos := -1
for j, n := range ds.Names() {
if n == groupvar {
idpos = j
break
}
}
if idpos == -1 {
msg := fmt.Sprintf("Regroup: variable '%s' not found", groupvar)
panic(msg)
}
r := doRegroup(ds, idpos)
if sortchunks {
var x []uint64
for _, v := range r.data[idpos] {
y := v.([]uint64)
x = append(x, y[0])
}
ii := make([]int, len(x))
for i := range ii {
ii[i] = i
}
a := argsort{x, ii}
sort.Sort(a)
nchunk := len(r.data[0])
newar := make([][]interface{}, len(r.data))
for j := 0; j < ds.NumVar(); j++ {
newar[j] = make([]interface{}, nchunk)
for k, i := range ii {
newar[j][k] = r.data[j][i]
}
}
r.data = newar
}
return r
}
|