File: index.go

package info (click to toggle)
golang-github-biogo-hts 1.4.5%2Bdfsg1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,772 kB
  • sloc: makefile: 3
file content (124 lines) | stat: -rw-r--r-- 3,150 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
// Copyright ©2015 The bíogo Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package index provides common code for CSI and tabix BGZF indexing.
package index

import (
	"errors"
	"io"

	"github.com/biogo/hts/bgzf"
)

var (
	ErrNoReference = errors.New("index: no reference")
	ErrInvalid     = errors.New("index: invalid interval")
)

// ReferenceStats holds mapping statistics for a genomic reference.
type ReferenceStats struct {
	// Chunk is the span of the indexed BGZF
	// holding alignments to the reference.
	Chunk bgzf.Chunk

	// Mapped is the count of mapped reads.
	Mapped uint64

	// Unmapped is the count of unmapped reads.
	Unmapped uint64
}

// Reader wraps a bgzf.Reader to provide a mechanism to read a selection of
// BGZF chunks.
type ChunkReader struct {
	r *bgzf.Reader

	wasBlocked bool

	chunks []bgzf.Chunk
}

// NewChunkReader returns a ChunkReader to read from r, limiting the reads to
// the provided chunks. The provided bgzf.Reader will be put into Blocked mode.
func NewChunkReader(r *bgzf.Reader, chunks []bgzf.Chunk) (*ChunkReader, error) {
	b := r.Blocked
	r.Blocked = true
	if len(chunks) != 0 {
		err := r.Seek(chunks[0].Begin)
		if err != nil {
			return nil, err
		}
	}
	return &ChunkReader{r: r, wasBlocked: b, chunks: chunks}, nil
}

// Read satisfies the io.Reader interface.
func (r *ChunkReader) Read(p []byte) (int, error) {
	if len(r.chunks) == 0 {
		return 0, io.EOF
	}
	last := r.r.LastChunk()
	if vOffset(last.End) >= vOffset(r.chunks[0].End) {
		return 0, io.EOF
	}

	// Ensure the byte slice does not extend beyond the end of
	// the current chunk. We do not need to consider reading
	// beyond the end of the block because the bgzf.Reader is in
	// blocked mode and so will stop there anyway.
	want := int(r.chunks[0].End.Block)
	if r.chunks[0].End.Block == 0 && r.chunks[0].End.File > last.End.File {
		// Special case for when the current end block offset
		// is zero.
		want = r.r.BlockLen()
	}
	var cursor int
	if last.End.File == r.chunks[0].End.File {
		// Our end is in the same block as the last chunk end
		// so set the cursor to the chunk block end to prevent
		// reading past the end of the chunk.
		cursor = int(last.End.Block)
	}
	n, err := r.r.Read(p[:min(len(p), want-cursor)])
	if err != nil {
		if n != 0 && err == io.EOF {
			err = nil
		}
		return n, err
	}

	// Check whether we are at or past the end of the current
	// chunk or we have not made progress for reasons other than
	// zero length p.
	this := r.r.LastChunk()
	if (len(p) != 0 && this == last) || vOffset(this.End) >= vOffset(r.chunks[0].End) {
		r.chunks = r.chunks[1:]
		if len(r.chunks) == 0 {
			return n, io.EOF
		}
		err = r.r.Seek(r.chunks[0].Begin)
	}

	return n, err
}

func vOffset(o bgzf.Offset) int64 {
	return o.File<<16 | int64(o.Block)
}

func min(a, b int) int {
	if a < b {
		return a
	}
	return b
}

// Close returns the bgzf.Reader to its original blocking mode and releases it.
// The bgzf.Reader is not closed.
func (r *ChunkReader) Close() error {
	r.r.Blocked = r.wasBlocked
	r.r = nil
	return nil
}