File: sv_test.go

package info (click to toggle)
golang-github-brentp-vcfgo 0.0~git20250902.a31336c-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,940 kB
  • sloc: makefile: 5; sh: 1
file content (161 lines) | stat: -rw-r--r-- 6,360 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
package vcfgo_test

import (
	"io"
	"strings"
	"testing"

	"github.com/brentp/vcfgo"

	. "gopkg.in/check.v1"
)

func Test(t *testing.T) { TestingT(t) }

var cnvStr = `##fileformat=VCFv4.1
##fileDate=20100501
##reference=1000GenomesPilot-NCBI36
##assembly=ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/release/sv/breakpoint_assemblies.fasta
##INFO=<ID=BKPTID,Number=.,Type=String,Description="ID of the assembled alternate allele in the assembly file">
##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">
##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints">
##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##ALT=<ID=DEL,Description="Deletion">
##ALT=<ID=DEL:ME:ALU,Description="Deletion of ALU element">
##ALT=<ID=DEL:ME:L1,Description="Deletion of L1 element">
##ALT=<ID=DUP,Description="Duplication">
##ALT=<ID=DUP:TANDEM,Description="Tandem Duplication">
##ALT=<ID=INS,Description="Insertion of novel sequence">
##ALT=<ID=INS:ME:ALU,Description="Insertion of ALU element">
##ALT=<ID=INS:ME:L1,Description="Insertion of L1 element">
##ALT=<ID=INV,Description="Inversion">
##ALT=<ID=CNV,Description="Copy number variable region">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype quality">
##FORMAT=<ID=CN,Number=1,Type=Integer,Description="Copy number genotype for imprecise events">
##FORMAT=<ID=CNQ,Number=1,Type=Float,Description="Copy number genotype quality for imprecise events">
#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001
2	321682	.	T	<DEL>	6	PASS	SVTYPE=DEL;END=321887;SVLEN=-205;CIPOS=-56,20;CIEND=-10,62	GT:GQ	0/1:12
2	14477084	.	C	<DEL:ME:ALU>	12	PASS	SVTYPE=DEL;END=14477381;CIPOS=-22,18;CIEND=-12,32	GT:GQ	0/1:12
3	9425916	.	C	<INS:ME:L1>	23	PASS	SVTYPE=INS;SVLEN=6027;CIPOS=-16,22	GT:GQ	1/1:15
3	12665100	.	A	<DUP>	14	PASS	SVTYPE=DUP;END=12686200;SVLEN=21100;CIPOS=-500,500;CIEND=-500,500	GT:GQ:CN:CNQ	./.:0:3:16.2
4	18665128	.	T	<DUP:TANDEM>	11	PASS	SVTYPE=DUP;END=18665204;SVLEN=76;CIPOS=-10,10;CIEND=-10,10	GT:GQ:CN:CNQ	./.:0:5:8.3
4	18665128	.	T	<INV>	11	PASS	SVTYPE=INS;END=18665204;SVLEN=76;CIPOS=-10,10;CIEND=-10,10	GT:GQ:CN:CNQ	./.:0:5:8.3
4	18665128	.	T	<CNV>	11	PASS	SVTYPE=DUP;END=18665204;SVLEN=76;CIPOS=-10,10;CIEND=-10,10	GT:GQ:CN:CNQ	./.:0:5:8.3
4	43266825	rs369548269	T	<DEL>	.	.	ON=0;OLD_MULTIALLELIC=4:43266825:T/TAC/<DEL>	GT	0/1
5	755892	175981_1	N	]1:759001]N	112.69	PASS	AC=4;AF=0.01361;ALG=PROD;AN=294;CIEND=-10,29;CIEND95=0,34;CIPOS=-30,29;CIPOS95=28,32;EVENT=175981;EXVAR=0.00508;HW=0.00;HetPct=2.7;HomRefPct=97.3;HomVarPct=0.0;IMPRECISE;MATEID=1	GT	0/1`

//1	2827694	rs2376870	CGTGGATGCGGGGAC	C	.	PASS	SVTYPE=DEL;END=2827762;HOMLEN=1;HOMSEQ=G;SVLEN=-68	GT:GQ	1/1:13.9

type CNVSuite struct {
	reader io.Reader
	vcfStr string
}

var _ = Suite(&CNVSuite{vcfStr: cnvStr})

func (s *CNVSuite) SetUpTest(c *C) {
	s.reader = strings.NewReader(s.vcfStr)
}

func (s *CNVSuite) TestDupIns(c *C) {
	r, err := vcfgo.NewReader(s.reader, false)
	c.Assert(err, IsNil)
	var v *vcfgo.Variant

	v = r.Read() //.(*vcfgo.Variant)
	c.Assert(int(v.End()), Equals, 321887)

	left, right, ok := v.CIPos()
	c.Assert(ok, Equals, true)
	c.Assert(int(left), Equals, 321682-56-1)
	c.Assert(int(right), Equals, 321682+20)

	eleft, eright, ok := v.CIEnd()
	c.Assert(ok, Equals, true)
	c.Assert(int(eleft), Equals, 321887-10-1)
	c.Assert(int(eright), Equals, 321887+62)

	v = r.Read() //.(*vcfgo.Variant)
	c.Assert(int(v.End()), Equals, 14477381)

	v = r.Read() //.(*vcfgo.Variant)
	c.Assert(int(v.Start()), Equals, 9425915)
	c.Assert(int(v.End()), Equals, 9431943)

	v = r.Read() //.(*vcfgo.Variant)
	c.Assert(int(v.End()), Equals, 12686200)

	v = r.Read() //.(*vcfgo.Variant)
	c.Assert(int(v.End()), Equals, 18665204)

	v = r.Read() //.(*vcfgo.Variant) // INS
	c.Assert(int(v.End()), Equals, 18665204)

	v = r.Read() //.(*vcfgo.Variant) // CNV
	c.Assert(int(v.End()), Equals, 18665204)

	v = r.Read() //.(*vcfgo.Variant) // CNV
	c.Assert(int(v.End()), Equals, 43266825)

	left, right, ok = v.CIPos()
	c.Assert(left, Equals, v.Start())
	c.Assert(right, Equals, v.Start()+uint32(1))
	c.Assert(ok, Equals, false)

	eleft, eright, ok = v.CIEnd()
	c.Assert(eleft, Equals, v.End()-1)
	c.Assert(eright, Equals, v.End())
	c.Assert(ok, Equals, false)

	v = r.Read() //.(*vcfgo.Variant) // BND
	c.Assert(int(v.Start()), Equals, 755891)
	c.Assert(int(v.End()), Equals, 755891+1)

}

var svlenNumberAStr = `##fileformat=VCFv4.1
##fileDate=20100501
##reference=1000GenomesPilot-NCBI36
##assembly=ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/release/sv/breakpoint_assemblies.fasta
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##ALT=<ID=DEL,Description="Deletion">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001
1	1000	.	T	<DEL>	60	PASS	SVTYPE=DEL;SVLEN=100	GT	0/1
1	2000	.	A	<DEL>,<DEL>	60	PASS	SVTYPE=DEL;SVLEN=150,200	GT	1/2
`

type SVLENNumberASuite struct {
	reader io.Reader
	vcfStr string
}

var _ = Suite(&SVLENNumberASuite{vcfStr: svlenNumberAStr})

func (s *SVLENNumberASuite) SetUpTest(c *C) {
	s.reader = strings.NewReader(s.vcfStr)
}

func (s *SVLENNumberASuite) TestSVLENNumberA(c *C) {
	r, err := vcfgo.NewReader(s.reader, false)
	c.Assert(err, IsNil)
	var v *vcfgo.Variant

	// Test single alternative allele with SVLEN Number=A
	v = r.Read()
	c.Assert(v, NotNil)
	c.Assert(int(v.Start()), Equals, 999) // 0-based
	c.Assert(int(v.End()), Equals, 1100)  // 1000 + 100

	// Test multiple alternative alleles with SVLEN Number=A
	v = r.Read()
	c.Assert(v, NotNil)
	c.Assert(int(v.Start()), Equals, 1999) // 0-based
	c.Assert(int(v.End()), Equals, 2150)   // 2000 + 150 (first alt allele)
}