1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
|
// Copyright ©2015 The bíogo Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package index provides common code for CSI and tabix BGZF indexing.
package index
import (
"errors"
"io"
"github.com/biogo/hts/bgzf"
)
var (
ErrNoReference = errors.New("index: no reference")
ErrInvalid = errors.New("index: invalid interval")
)
// ReferenceStats holds mapping statistics for a genomic reference.
type ReferenceStats struct {
// Chunk is the span of the indexed BGZF
// holding alignments to the reference.
Chunk bgzf.Chunk
// Mapped is the count of mapped reads.
Mapped uint64
// Unmapped is the count of unmapped reads.
Unmapped uint64
}
// Reader wraps a bgzf.Reader to provide a mechanism to read a selection of
// BGZF chunks.
type ChunkReader struct {
r *bgzf.Reader
wasBlocked bool
chunks []bgzf.Chunk
}
// NewChunkReader returns a ChunkReader to read from r, limiting the reads to
// the provided chunks. The provided bgzf.Reader will be put into Blocked mode.
func NewChunkReader(r *bgzf.Reader, chunks []bgzf.Chunk) (*ChunkReader, error) {
b := r.Blocked
r.Blocked = true
if len(chunks) != 0 {
err := r.Seek(chunks[0].Begin)
if err != nil {
return nil, err
}
}
return &ChunkReader{r: r, wasBlocked: b, chunks: chunks}, nil
}
// Read satisfies the io.Reader interface.
func (r *ChunkReader) Read(p []byte) (int, error) {
if len(r.chunks) == 0 {
return 0, io.EOF
}
last := r.r.LastChunk()
if vOffset(last.End) >= vOffset(r.chunks[0].End) {
return 0, io.EOF
}
// Ensure the byte slice does not extend beyond the end of
// the current chunk. We do not need to consider reading
// beyond the end of the block because the bgzf.Reader is in
// blocked mode and so will stop there anyway.
want := int(r.chunks[0].End.Block)
if r.chunks[0].End.Block == 0 && r.chunks[0].End.File > last.End.File {
// Special case for when the current end block offset
// is zero.
want = r.r.BlockLen()
}
var cursor int
if last.End.File == r.chunks[0].End.File {
// Our end is in the same block as the last chunk end
// so set the cursor to the chunk block end to prevent
// reading past the end of the chunk.
cursor = int(last.End.Block)
}
n, err := r.r.Read(p[:min(len(p), want-cursor)])
if err != nil {
if n != 0 && err == io.EOF {
err = nil
}
return n, err
}
// Check whether we are at or past the end of the current
// chunk or we have not made progress for reasons other than
// zero length p.
this := r.r.LastChunk()
if (len(p) != 0 && this == last) || vOffset(this.End) >= vOffset(r.chunks[0].End) {
r.chunks = r.chunks[1:]
if len(r.chunks) == 0 {
return n, io.EOF
}
err = r.r.Seek(r.chunks[0].Begin)
}
return n, err
}
func vOffset(o bgzf.Offset) int64 {
return o.File<<16 | int64(o.Block)
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
// Close returns the bgzf.Reader to its original blocking mode and releases it.
// The bgzf.Reader is not closed.
func (r *ChunkReader) Close() error {
r.r.Blocked = r.wasBlocked
r.r = nil
return nil
}
|