File: overlap_example_test.go

package info (click to toggle)
golang-github-biogo-hts 1.1.0%2Bdfsg1-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 1,660 kB
  • sloc: makefile: 3
file content (124 lines) | stat: -rw-r--r-- 3,551 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
// Copyright ©2012 The bíogo Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package sam_test

import (
	"fmt"

	"github.com/biogo/hts/sam"
)

func min(a, b int) int {
	if a > b {
		return b
	}
	return a
}

func max(a, b int) int {
	if a < b {
		return b
	}
	return a
}

// Overlap returns the length of the overlap between the alignment
// of the SAM record and the interval specified.
//
// Note that this will count repeated matches to the same reference
// location if CigarBack operations are used.
func Overlap(r *sam.Record, start, end int) int {
	var overlap int
	pos := r.Pos
	for _, co := range r.Cigar {
		t := co.Type()
		con := t.Consumes()
		lr := co.Len() * con.Reference
		if con.Query == con.Reference {
			o := min(pos+lr, end) - max(pos, start)
			if o > 0 {
				overlap += o
			}
		}
		pos += lr
	}

	return overlap
}

func ExampleConsume() {
	// Example alignments from the SAM specification:
	//
	// @HD	VN:1.5	SO:coordinate
	// @SQ	SN:ref	LN:45
	// @CO	--------------------------------------------------------
	// @CO	Coor     12345678901234  5678901234567890123456789012345
	// @CO	ref      AGCATGTTAGATAA**GATAGCTGTGCTAGTAGGCAGTCAGCGCCAT
	// @CO	--------------------------------------------------------
	// @CO	+r001/1        TTAGATAAAGGATA*CTG
	// @CO	+r002         aaaAGATAA*GGATA
	// @CO	+r003       gcctaAGCTAA
	// @CO	+r004                     ATAGCT..............TCAGC
	// @CO	-r003                            ttagctTAGGC
	// @CO	-r001/2                                        CAGCGGCAT
	// @CO	--------------------------------------------------------
	// r001	99	ref	7	30	8M2I4M1D3M	=	37	39	TTAGATAAAGGATACTG	*
	// r002	0	ref	9	30	3S6M1P1I4M	*	0	0	AAAAGATAAGGATA	*
	// r003	0	ref	9	30	5S6M	*	0	0	GCCTAAGCTAA	*	SA:Z:ref,29,-,6H5M,17,0;
	// r004	0	ref	16	30	6M14N5M	*	0	0	ATAGCTTCAGC	*
	// r003	2064	ref	29	17	6H5M	*	0	0	TAGGC	*	SA:Z:ref,9,+,5S6M,30,1;
	// r001	147	ref	37	30	9M	=	7	-39	CAGCGGCAT	*	NM:i:1

	const (
		refStart = 0
		refEnd   = 45
	)

	records := []*sam.Record{
		{Name: "r001/1", Pos: 6, Cigar: []sam.CigarOp{
			sam.NewCigarOp(sam.CigarMatch, 8),
			sam.NewCigarOp(sam.CigarInsertion, 2),
			sam.NewCigarOp(sam.CigarMatch, 4),
			sam.NewCigarOp(sam.CigarDeletion, 1),
			sam.NewCigarOp(sam.CigarMatch, 3),
		}},
		{Name: "r002", Pos: 8, Cigar: []sam.CigarOp{
			sam.NewCigarOp(sam.CigarSoftClipped, 3),
			sam.NewCigarOp(sam.CigarMatch, 6),
			sam.NewCigarOp(sam.CigarPadded, 1),
			sam.NewCigarOp(sam.CigarInsertion, 1),
			sam.NewCigarOp(sam.CigarMatch, 4),
		}},
		{Name: "r003", Pos: 8, Cigar: []sam.CigarOp{
			sam.NewCigarOp(sam.CigarSoftClipped, 5),
			sam.NewCigarOp(sam.CigarMatch, 6),
		}},
		{Name: "r004", Pos: 15, Cigar: []sam.CigarOp{
			sam.NewCigarOp(sam.CigarMatch, 6),
			sam.NewCigarOp(sam.CigarSkipped, 14),
			sam.NewCigarOp(sam.CigarMatch, 5),
		}},
		{Name: "r003", Pos: 28, Cigar: []sam.CigarOp{
			sam.NewCigarOp(sam.CigarHardClipped, 6),
			sam.NewCigarOp(sam.CigarMatch, 5),
		}},
		{Name: "r001/2", Pos: 36, Cigar: []sam.CigarOp{
			sam.NewCigarOp(sam.CigarMatch, 9),
		}},
	}

	for _, r := range records {
		fmt.Printf("%q overlaps reference by %d letters\n", r.Name, Overlap(r, refStart, refEnd))
	}

	// Output:
	//
	// "r001/1" overlaps reference by 15 letters
	// "r002" overlaps reference by 10 letters
	// "r003" overlaps reference by 6 letters
	// "r004" overlaps reference by 11 letters
	// "r003" overlaps reference by 5 letters
	// "r001/2" overlaps reference by 9 letters
}