File: merger_test.go

package info (click to toggle)
golang-github-biogo-hts 1.4.4%2Bdfsg1-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,740 kB
  • sloc: makefile: 3
file content (175 lines) | stat: -rw-r--r-- 3,832 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
// Copyright ©2017 The bíogo Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package bam

import (
	"bytes"
	"fmt"
	"io"
	"sort"
	"testing"

	"github.com/biogo/hts/sam"
)

type byFunc struct {
	less func(a, b *sam.Record) bool
	recs []*sam.Record
}

func (r byFunc) Len() int           { return len(r.recs) }
func (r byFunc) Less(i, j int) bool { return r.less(r.recs[i], r.recs[j]) }
func (r byFunc) Swap(i, j int)      { r.recs[i], r.recs[j] = r.recs[j], r.recs[i] }

func sortBAM(r io.Reader, so sam.SortOrder, less func(a, b *sam.Record) bool, fn func(*sam.Record), shard int) error {
	br, err := NewReader(r, 0)
	if err != nil {
		return fmt.Errorf("failed to open bam reader: %v", err)
	}
	defer br.Close()

	h := br.Header().Clone()
	h.SortOrder = so

	recs := make([]*sam.Record, 0, shard)
	var t []*Reader
	it := sam.NewIterator(br)
	for {
		var n int
		for it.Next() {
			recs = append(recs, it.Record())
			if len(recs) == cap(recs) {
				r, err := writeSorted(h, recs, less)
				if err != nil {
					return err
				}
				defer r.Close()
				t = append(t, r)
				n, recs = len(recs), recs[:0]
			}
		}
		if len(recs) != 0 {
			r, err := writeSorted(h, recs, less)
			if err != nil {
				return err
			}
			defer r.Close()
			t = append(t, r)
			break
		}
		err = it.Error()
		if n == 0 || err != nil {
			break
		}
	}
	if err != nil {
		return fmt.Errorf("error during bam reading: %v", err)
	}

	m, err := NewMerger(less, t...)
	if err != nil {
		return fmt.Errorf("failed to create merger: %v", err)
	}
	sorted := sam.NewIterator(m)
	for sorted.Next() {
		fn(sorted.Record())
	}
	err = sorted.Error()
	if err != nil {
		return fmt.Errorf("error during bam reading: %v", err)
	}

	return nil
}

func writeSorted(h *sam.Header, recs []*sam.Record, less func(a, b *sam.Record) bool) (*Reader, error) {
	if less != nil {
		sort.Sort(byFunc{less, recs})
	}

	var buf bytes.Buffer

	bw, err := NewWriter(&buf, h, 0)
	if err != nil {
		return nil, fmt.Errorf("failed to open bam writer: %v", err)
	}
	for _, r := range recs {
		err = bw.Write(r)
		if err != nil {
			return nil, fmt.Errorf("failed to write record: %v", err)
		}
	}
	err = bw.Close()
	if err != nil {
		return nil, fmt.Errorf("failed to close bam writer: %v", err)
	}

	r, err := NewReader(&buf, 0)
	if err != nil {
		return nil, fmt.Errorf("failed to open bam writer: %v", err)
	}
	return r, err
}

var mergerTests = []struct {
	r func() io.Reader

	so     sam.SortOrder
	less   func(a, b *sam.Record) bool
	expect func(a, b *sam.Record) bool
	shard  int
}{
	{
		r:      func() io.Reader { return bytes.NewReader(bamHG00096_1000) },
		so:     sam.QueryName,
		less:   nil,
		expect: (*sam.Record).LessByName,
		shard:  199,
	},
	{
		r:      func() io.Reader { return bytes.NewReader(bamHG00096_1000) },
		so:     sam.QueryName,
		less:   nil,
		expect: (*sam.Record).LessByName,
		shard:  1e5,
	},
	{
		r:      func() io.Reader { return bytes.NewReader(bamHG00096_1000) },
		so:     sam.Unsorted,
		less:   nil,
		expect: (*sam.Record).LessByCoordinate, // HG00096 is sorted by coordinate.
		shard:  199,
	},
}

func TestMerger(t *testing.T) {
	for _, test := range mergerTests {
		var recs []*sam.Record
		fn := func(r *sam.Record) {
			recs = append(recs, r)
		}

		var less func(a, b *sam.Record) bool
		switch test.so {
		case sam.UnknownOrder:
			less = test.less
		case sam.Unsorted:
		case sam.QueryName:
			less = (*sam.Record).LessByName
		case sam.Coordinate:
			less = (*sam.Record).LessByCoordinate
		}
		err := sortBAM(test.r(), test.so, less, fn, test.shard)
		if err != nil {
			t.Errorf("unexpected error: %v", err)
		}
		if less == nil {
			continue
		}
		if !sort.IsSorted(byFunc{test.expect, recs}) {
			t.Error("not in expected sort order")
		}
	}
}