1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
|
// Copyright ©2012 The bíogo Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package sam_test
import (
"fmt"
"github.com/biogo/hts/sam"
)
func min(a, b int) int {
if a > b {
return b
}
return a
}
func max(a, b int) int {
if a < b {
return b
}
return a
}
// Overlap returns the length of the overlap between the alignment
// of the SAM record and the interval specified.
//
// Note that this will count repeated matches to the same reference
// location if CigarBack operations are used.
func Overlap(r *sam.Record, start, end int) int {
var overlap int
pos := r.Pos
for _, co := range r.Cigar {
t := co.Type()
con := t.Consumes()
lr := co.Len() * con.Reference
if con.Query == con.Reference {
o := min(pos+lr, end) - max(pos, start)
if o > 0 {
overlap += o
}
}
pos += lr
}
return overlap
}
func ExampleConsume() {
// Example alignments from the SAM specification:
//
// @HD VN:1.5 SO:coordinate
// @SQ SN:ref LN:45
// @CO --------------------------------------------------------
// @CO Coor 12345678901234 5678901234567890123456789012345
// @CO ref AGCATGTTAGATAA**GATAGCTGTGCTAGTAGGCAGTCAGCGCCAT
// @CO --------------------------------------------------------
// @CO +r001/1 TTAGATAAAGGATA*CTG
// @CO +r002 aaaAGATAA*GGATA
// @CO +r003 gcctaAGCTAA
// @CO +r004 ATAGCT..............TCAGC
// @CO -r003 ttagctTAGGC
// @CO -r001/2 CAGCGGCAT
// @CO --------------------------------------------------------
// r001 99 ref 7 30 8M2I4M1D3M = 37 39 TTAGATAAAGGATACTG *
// r002 0 ref 9 30 3S6M1P1I4M * 0 0 AAAAGATAAGGATA *
// r003 0 ref 9 30 5S6M * 0 0 GCCTAAGCTAA * SA:Z:ref,29,-,6H5M,17,0;
// r004 0 ref 16 30 6M14N5M * 0 0 ATAGCTTCAGC *
// r003 2064 ref 29 17 6H5M * 0 0 TAGGC * SA:Z:ref,9,+,5S6M,30,1;
// r001 147 ref 37 30 9M = 7 -39 CAGCGGCAT * NM:i:1
const (
refStart = 0
refEnd = 45
)
records := []*sam.Record{
{Name: "r001/1", Pos: 6, Cigar: []sam.CigarOp{
sam.NewCigarOp(sam.CigarMatch, 8),
sam.NewCigarOp(sam.CigarInsertion, 2),
sam.NewCigarOp(sam.CigarMatch, 4),
sam.NewCigarOp(sam.CigarDeletion, 1),
sam.NewCigarOp(sam.CigarMatch, 3),
}},
{Name: "r002", Pos: 8, Cigar: []sam.CigarOp{
sam.NewCigarOp(sam.CigarSoftClipped, 3),
sam.NewCigarOp(sam.CigarMatch, 6),
sam.NewCigarOp(sam.CigarPadded, 1),
sam.NewCigarOp(sam.CigarInsertion, 1),
sam.NewCigarOp(sam.CigarMatch, 4),
}},
{Name: "r003", Pos: 8, Cigar: []sam.CigarOp{
sam.NewCigarOp(sam.CigarSoftClipped, 5),
sam.NewCigarOp(sam.CigarMatch, 6),
}},
{Name: "r004", Pos: 15, Cigar: []sam.CigarOp{
sam.NewCigarOp(sam.CigarMatch, 6),
sam.NewCigarOp(sam.CigarSkipped, 14),
sam.NewCigarOp(sam.CigarMatch, 5),
}},
{Name: "r003", Pos: 28, Cigar: []sam.CigarOp{
sam.NewCigarOp(sam.CigarHardClipped, 6),
sam.NewCigarOp(sam.CigarMatch, 5),
}},
{Name: "r001/2", Pos: 36, Cigar: []sam.CigarOp{
sam.NewCigarOp(sam.CigarMatch, 9),
}},
}
for _, r := range records {
fmt.Printf("%q overlaps reference by %d letters\n", r.Name, Overlap(r, refStart, refEnd))
}
// Output:
//
// "r001/1" overlaps reference by 15 letters
// "r002" overlaps reference by 10 letters
// "r003" overlaps reference by 6 letters
// "r004" overlaps reference by 11 letters
// "r003" overlaps reference by 5 letters
// "r001/2" overlaps reference by 9 letters
}
|