File: test-extensions-VCF.R

package info (click to toggle)
r-bioc-structuralvariantannotation 1.13.0%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,568 kB
  • sloc: makefile: 2
file content (557 lines) | stat: -rw-r--r-- 25,887 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
context('main functions')
example <- readVcf(.testfile("vcf4.2.example.sv.vcf"), "")
simple <- readVcf(.testfile("simple.vcf"), "")
breakend <- readVcf(.testfile("breakend.vcf"), "")
multipleAlleles <- readVcf(.testfile("multipleAltSVs.vcf"), "")
representations <- readVcf(.testfile("representations.vcf"))


breakdancer <- readVcf(.testfile("breakdancer-1.4.5.vcf"), "")
#cortex <- readVcf(.testfile("cortex-1.0.5.14.vcf"), "")
#crest <- readVcf(.testfile("crest.vcf"), "")
delly <- readVcf(.testfile("delly-0.6.8.vcf"), "")
#gasv <- readVcf(.testfile("gasv-20140228.vcf"), "")
gridss <- readVcf(.testfile("gridss.vcf"), "")
gridss_missing_partner <- readVcf(.testfile("gridss-missingPartner.vcf"), "")
#lumpy <- readVcf(.testfile("lumpy-0.2.11.vcf"), "")
pindel <- readVcf(.testfile("pindel-0.2.5b6.vcf"), "")
#socrates <- readVcf(.testfile("socrates-1.13.vcf"), "")
tigra <- readVcf(.testfile("tigra-0.3.7.vcf"), "")
manta <- readVcf(.testfile("manta-0.29.6.vcf"), "")
manta111 <- readVcf(.testfile("manta-1.1.1.vcf"), "")
longranger <- readVcf(.testfile("COLO829.10X.longranger.largeSVs.vcf"), "")

test_that("INFO column import", {
	gr <- breakpointRanges(simple, info_columns=c("SVTYPE", "MATEID"))
	expect_equal("BNDBF", as.character(gr["BNDFB"]$MATEID))

	gr <- breakpointRanges(.testrecord(c("chr10	2991435	INV	N	<INV>	.	LowQual	SVTYPE=INV;CHR2=chr1;END=19357517;CT=3to5")))
})
test_that("longranger UNK", {
	gr <- breakpointRanges(longranger)[c("call_2416_1", "call_2416_2")]
	expect_equal(as.character(strand(gr)), c("*", "*"))
})
test_that("longranger IMPRECISE_DIR", {
	gr <- breakpointRanges(longranger)[c("call_534_bp1", "call_534_bp2")]
	expect_equal(c(2632546-10, 2632545-10+57120), start(gr))
	expect_equal(as.character(strand(gr)), c("*", "*"))
})
test_that("Delly TRA", {
	# https://groups.google.com/forum/#!msg/delly-users/6Mq2juBraRY/BjmMrBh3GAAJ
	# Sorry, I forgot to post this to the delly-users list:
	# For a translocation, you have 2 double strand breaks, one on chrA and one on chrB.
	# This creates 4 "dangling" ends, chrA_left, chrA_right, chrB_left, chrB_right.
	# For a translocation you can join chrA_left with chrB_left (3to3), chrA_left with chrB_right (3to5),
	# chrA_right with chrB_left (5to3) and chrA_right with chrB_right (5to5).
	# In fact for a typical reciprocal translocation in prostate cancer (where two chromosomes exchange their end)
	# Delly calls 2 translocations at the breakpoint, one 3to5 and one 5to3. But obviously not all translocations are reciprocal.
	# -Tobias
    gr <- breakpointRanges(.testrecord(c("chr10	2991435	TRA00000001	N	<TRA>	.	LowQual	CIEND=0,100;CIPOS=0,50;SVTYPE=TRA;CHR2=chr1;END=19357517;CT=3to5")))
    expect_equal(2, length(gr))
    expect_equal(c(2991435, 19357517), start(gr))
    expect_equal(c(2991485, 19357617), end(gr))
    expect_equal(c("+", "-"), as.character(strand(gr)))
    expect_equal(c("chr10", "chr1"), as.character(seqnames(gr)))
    gr <- breakpointRanges(delly)
})
test_that("tigra CTX", {
	gr <- breakpointRanges(tigra[3,])
	expect_equal(c(102520604 - 10, 70284 - 10), start(gr))
	expect_equal(c(102520604 + 10, 70284 + 10), end(gr))
	expect_equal(c("*", "*"), as.character(strand(gr)))
    expect_equal(c("chr12", "chrUn_gl000223"), as.character(seqnames(gr)))
})
test_that("pindel RPL", {
	gr <- breakpointRanges(pindel[5,])
	expect_equal(c(1029142, 1029183), start(gr))
	expect_equal(c("+", "-"), as.character(strand(gr)))
	expect_equal(c(51, 51), gr$insLen)
})
test_that("empty VCF", {
	expect_equal(0, length(breakpointRanges(.testrecord(c()))))
})
test_that(".hasSingleAllelePerRecord", {
	expect_false(.hasSingleAllelePerRecord(multipleAlleles))
	expect_true(.hasSingleAllelePerRecord(expand(multipleAlleles)))
})
test_that("isSymbolic", {
	expect_equal(
	    c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE),
	    isSymbolic(simple))
})
test_that("isStructural", {
	expect_equal(
	    c(FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE),
	    isStructural(simple))
	expect_true(isStructural(.testrecord("chr1	1	.	ATT	AGGA	.	.	")))
	expect_false(isStructural(.testrecord("chr1	1	.	ATT	NNN	.	.	")))
})
test_that(".svLen", {
	expect_equal(c(0, 0, 1, -1, 1, -2, NA, NA, NA, NA), .svLen(simple))
	expect_equal(.svLen(.testrecord("chr1	100	.	A	<DEL>	.	.	SVLEN=-1")), c(-1))
	expect_equal(.svLen(.testrecord("chr1	100	.	A	<DUP>	.	.	END=101")), c(1))
})
# unpaired breakend
test_that("partner fails if missing mate", {
	expect_error(partner(breakpointRanges(simple)[1,]))
})
test_that("breakpointRanges convert to breakend pairs", {
	gr <- breakpointRanges(simple)
	pairId <- c("INS", "DEL", "SYMINS", "SYMDEL")
	expect_true(all(paste0(pairId, "_bp1") %in% names(gr)))
	expect_true(all(paste0(pairId, "_bp2") %in% names(gr)))
	expect_equal(names(partner(gr))[names(gr) %in% paste0(pairId, "_bp1")], paste0(pairId, "_bp2"))
	expect_equal(names(partner(gr))[names(gr) %in% c("BNDFB", "BNDBF")], c("BNDBF", "BNDFB"))
})
test_that("breakpointRanges creates placeholder names", {
	expect_warning(expect_named(breakpointRanges(.testrecord(c(
		"chr1	100	.	A	<DEL>	.	.	SVLEN=-1",
		"chr1	100	.	A	<DEL>	.	.	SVLEN=-1")))),
		regex="Found 1 duplicate row names")
})
test_that("breakpointRanges non-symbolic alleles", {
	gr <- breakpointRanges(simple[c("INS", "DEL"),])
	expect_equal(4, length(gr))

	gr <- breakpointRanges(.testrecord("chr1	1	.	ATT	AGGA	.	.	"))
	expect_equal(start(gr), c(1, 4))
	expect_equal(gr$insSeq, c("GGA", "GGA"))
	expect_equal(gr$svLen, c(1, 1))

	gr <- breakpointRanges(.testrecord("chr1	2	.	TTT	AGGA	.	.	"))
	expect_equal(start(gr), c(1, 5))
	expect_equal(gr$insSeq, c("AGGA", "AGGA"))
	expect_equal(gr$svLen, c(1, 1))

	gr <- breakpointRanges(.testrecord("chr1	2	.	AGT	AGGA	.	.	"))
	expect_equal(start(gr), c(3, 5))
	expect_equal(gr$insSeq, c("GA", "GA"))
	expect_equal(gr$insLen, c(2, 2))
	expect_equal(gr$svLen, c(1, 1))

	gr <- breakpointRanges(.testrecord("chr1	2	.	AGGA	AG	.	.	"))
	expect_equal(start(gr), c(3, 6))
	expect_equal(as.character(strand(gr)), c("+", "-"))
	expect_equal(gr$insLen, c(0, 0))
	expect_equal(gr$svLen, c(-2, -2))
})
test_that("breakpointRanges intervals", {
	# Position assumed to the left aligned
	gr <- breakpointRanges(.testrecord("chr1	100	.	A	AT	.	.	HOMLEN=0"))
	expect_equal(start(gr), c(100, 101))
	expect_equal(end(gr), c(100, 101))

	gr <- breakpointRanges(.testrecord("chr1	100	.	A	AT	.	.	HOMLEN=1"))
	expect_equal(start(gr), c(100, 101))
	expect_equal(end(gr), c(101, 102))

	gr <- breakpointRanges(.testrecord("chr1	100	.	A	AT	.	.	HOMLEN=2"))
	expect_equal(start(gr), c(100, 101))
	expect_equal(end(gr), c(102, 103))

	gr <- breakpointRanges(.testrecord("chr1	100	.	A	AT	.	.	HOMSEQ=AAAAAAAAAA"))
	expect_equal(start(gr), c(100, 101))
	expect_equal(end(gr), c(110, 111))

	gr <- breakpointRanges(.testrecord("chr1	100	.	A	AT	.	.	CIPOS=-5,10"))
	expect_equal(start(gr), c(95, 96))
	expect_equal(end(gr), c(110, 111))

	# CIPOS over homology
	gr <- breakpointRanges(.testrecord("chr1	100	.	A	AT	.	.	CIPOS=-5,10;HOMLEN=50;HOMESEQ=A"))
	expect_equal(start(gr), c(95, 96))
	expect_equal(end(gr), c(110, 111))

	# HOMLEN over HOMSEQ
	gr <- breakpointRanges(.testrecord("chr1	100	.	A	AT	.	.	HOMLEN=50;HOMESEQ=A"))
	expect_equal(start(gr), c(100, 101))
	expect_equal(end(gr), c(150, 151))
})
test_that("breakpointRanges DEL", {
	gr <- breakpointRanges(.testrecord("chr1	100	.	A	<DEL>	.	.	SVLEN=-1"))
	expect_equal(start(gr), c(100, 102))
	expect_equal(gr$insLen, c(0, 0))

	gr <- breakpointRanges(.testrecord("chr1	100	.	A	<DEL>	.	.	END=101"))
	expect_equal(start(gr), c(100, 102))

	breakpointRanges(.testrecord("chr1	100	.	A	<DEL:WITH:SUBTYPE>	.	.	END=101"))
	breakpointRanges(.testrecord("chr1	100	.	A	<DEL>	.	.	SVTYPE=DEL:WITH:SUBTYPE;END=101"))

	# warning about incompatable SVLEN and END fields
	#expect_warning(breakpointRanges(.testrecord("chr1	100	.	A	<DEL>	.	.	END=101;SVLEN=-10")), "SVLEN")
})
test_that("breakpointRanges should fix positive DEL event size", {
	gr <- breakpointRanges(.testrecord("chr1	100	.	A	<DEL>	.	.	SVLEN=10"))
	expect_equal(start(gr), c(100, 111))
	expect_equal(gr$insLen, c(0, 0))
})
test_that("breakpointRanges breakend", {
	expect_warning(gr <- breakpointRanges(breakend))
	expect_equal("parid_b", gr["parid_a",]$partner)
	expect_equal("mateid_b", gr["mateid_a",]$partner)
	expect_equal(partner(gr[c("parid_a", "parid_b"),]), gr[c("parid_b", "parid_a"),])
	expect_warning(breakpointRanges(breakend[c("mateid_a", "mateid_b", "multi_mateid")]), "Ignoring additional mate breakends")
	expect_warning(breakpointRanges(breakend[c("unpaired")]), "Removing [0-9]+ unpaired breakend variants")
	expect_equal(breakpointRanges(.testrecord(c(
		"chr1	100	a	N	N[chr1:105[	.	.	SVTYPE=BND;CIPOS=0,1;PARID=b",
		"chr1	105	b	N	]chr1:100]N	.	.	SVTYPE=BND;CIPOS=0,1;PARID=a"
	)))$svLen, c(-4, -4))
	expect_equal(breakpointRanges(.testrecord(c(
		"chr1	100	a	N	NAAAA[chr1:101[	.	.	SVTYPE=BND;CIPOS=0,1;PARID=b",
		"chr1	101	b	N	]chr1:100]TTTTN	.	.	SVTYPE=BND;CIPOS=0,1;PARID=a"
	)))$svLen, c(4, 4))
})
test_that("breakpointRanges INV", {
	# VCF example
	gr <- breakpointRanges(.testrecord("chr1	321682	INV0	T	<INV>	6	PASS	SVTYPE=INV;END=421681"))
	expect_equal(4, length(gr))
	expect_equal(start(gr), c(321682+1, 421681+1, 321682, 421681))
	expect_equal(as.character(strand(gr)), c("-", "-", "+", "+"))
	expect_equal(names(gr), c("INV0_bp1", "INV0_bp2", "INV0_bp3", "INV0_bp4"))

	gr <- breakpointRanges(.testrecord("chr1	321682	INV0	T	<INV>	6	PASS	SVTYPE=INV;END=421681;CIPOS=-2,1;CIEND=-3,4"))
	expect_equal(4, length(gr))
	expect_equal(start(gr), c(321682+1, 421681+1, 321682, 421681) + c(-2, -3, -2 ,-3))
	expect_equal(  end(gr), c(321682+1, 421681+1, 321682, 421681) + c( 1,  4,  1,  4))

	expect_error(breakpointRanges(.testrecord("chr1	321682	INV0	T	<INV>	6	PASS	SVTYPE=INV")))
})
test_that("breakpointRanges DUP", {
	# VCF example
	gr <- breakpointRanges(.testrecord(c(
		"chr1	12665100	.	A	<DUP>	14	PASS	SVTYPE=DUP;END=12686200;SVLEN=21100",
		"chr1	18665128	.	T	<DUP:TANDEM>	11	PASS	SVTYPE=DUP;END=18665204;SVLEN=76")))
	expect_equal(4, length(gr))
	expect_equal(start(gr), c(12665100+1, 18665128+1,
							  12686200, 18665204))
	expect_equal(as.character(strand(gr)), c("-", "-", "+", "+"))
	expect_equal(c(0,0,0,0), gr$insLen)

	gr <- breakpointRanges(.testrecord("chr1	12665100	.	A	<DUP>	14	PASS	SVTYPE=DUP;END=12686200;SVLEN=21100;CIPOS=-2,1;CIEND=-3,4"))
	expect_equal(2, length(gr))
	expect_equal(start(gr), c(12665100+1, 12686200) + c(-2,-3))
	expect_equal(  end(gr), c(12665100+1, 12686200) + c( 1, 4))

	expect_error(breakpointRanges(.testrecord("chr1	321682	.	T	<DUP>	.	.	SVTYPE=DUP")))

	gr <- breakpointRanges(.testrecord("chr12	5616362	chr12.5616362.DUP65536	A	<DUP>	.	.	SVLEN=65536;SVTYPE=DUP"))
	expect_equal(2, length(gr))
	expect_equal(start(gr), c(5616362+1, 5616362+65536))
	expect_equal(c("-", "+"), as.character(strand(gr)))
})

# test_that("manta merge should retain only unique events", {
# 	# VCF example
# 	gr <- breakpointRanges(manta)
# 	expect_equal(4, length(gr))
# })
test_that("manta 1.1.1", {
	gr <- breakpointRanges(manta111)
	expect_equal(2*10, length(gr))
})
test_that("manta INV3 should only have 1 breakpoint", {
	gr <- breakpointRanges(manta111[info(manta111)$INV3])
	expect_equal(c("+", "+"), as.character(strand(gr)))
})

test_that("nominalPosition should ignore confidence intervals", {
	# VCF example
	vcfExact <- .testrecord(c("chr1	100	.	A	<DEL>	14	PASS	SVTYPE=DEL;END=200"))
	vcfCI <- .testrecord(c("chr1	100	.	A	<DEL>	14	PASS	SVTYPE=DEL;END=200;CIEND=-10,10;CIPOS=-5,5"))
	expect_equal(breakpointRanges(vcfCI, nominalPosition=TRUE), breakpointRanges(vcfExact, nominalPosition=TRUE))
	expect_equal(ranges(breakpointRanges(vcfCI, nominalPosition=TRUE)), ranges(breakpointRanges(vcfExact, nominalPosition=TRUE)))
})
test_that("nominalPosition should ignore micro-homology", {
	# VCF example
	vcfExact <- .testrecord(c("chr1	100	.	A	<DEL>	14	PASS	SVTYPE=DEL;END=200"))
	vcfHOM <- .testrecord(c("chr1	100	.	A	<DEL>	14	PASS	SVTYPE=DEL;END=200;HOMLEN=15"))
	expect_equal(ranges(breakpointRanges(vcfHOM, nominalPosition=TRUE)), ranges(breakpointRanges(vcfExact, nominalPosition=TRUE)))
})
test_that("breakpointRanges should not include breakends", {
	expect_true(isSymbolic(.testrecord(c("chr1	100	.	A	AAA.	14	PASS	SVTYPE=BND"))))
	expect_equal(0, length(breakpointRanges(.testrecord(c("chr1	100	.	A	AAA.	14	PASS	SVTYPE=BND")))))
	expect_equal(0, length(breakpointRanges(.testrecord(c("chr1	1541062	gridss0_2065b	G	.GGTGGG	262.69	.	SVTYPE=BND")))))
})
test_that("breakendRanges should include breakends", {
	gr <- breakendRanges(.testrecord(c("chr1	100	.	A	TGC.	14	PASS	SVTYPE=BND")))
	expect_equal(1, length(gr))
	expect_equal("GC", gr$insSeq)
})
test_that("align_breakpoint should handle all orientations", {
	noname = function(x) { names(x) = NULL; return(x)}
	vcf = .testrecord(c(
		"chr1	1000	a	N	A[chr1:2000[	.	.	SVTYPE=BND;CIPOS=0,4;PARID=b",
		"chr1	2000	b	N	]chr1:1000]G	.	.	SVTYPE=BND;CIPOS=0,4;PARID=a"))
	vcf = align_breakpoints(vcf)
	expect_equal(c(1002, 2002), start(rowRanges(vcf)))
	expect_equal(c(-2,2, -2,2), noname(unlist(info(vcf)$CIPOS)))
	expect_equal(c("N[chr1:2002[", "]chr1:1002]N"), unlist(rowRanges(vcf)$ALT))
	expect_equal(c("+-", "-+"), .vcfAltToStrandPair(rowRanges(vcf)$ALT))

	vcf = .testrecord(c(
		"chr1	1000	a	N	]chr1:2000]A	.	.	SVTYPE=BND;CIPOS=0,4;PARID=b",
		"chr1	2000	b	N	G[chr1:1000[	.	.	SVTYPE=BND;CIPOS=0,4;PARID=a"))
	vcf = align_breakpoints(vcf)
	expect_equal(c(1002, 2002), start(rowRanges(vcf)))
	expect_equal(c(-2,2, -2,2), noname(unlist(info(vcf)$CIPOS)))
	expect_equal(c("]chr1:2002]N", "N[chr1:1002["), unlist(rowRanges(vcf)$ALT))
	expect_equal(c("-+", "+-"), .vcfAltToStrandPair(rowRanges(vcf)$ALT))

	vcf = .testrecord(c(
		"chr1	1000	a	N	A]chr1:2000]	.	.	SVTYPE=BND;CIPOS=-4,0;PARID=b",
		"chr1	2000	b	N	G]chr1:1000]	.	.	SVTYPE=BND;CIPOS=0,4;PARID=a"))
	vcf = align_breakpoints(vcf)
	expect_equal(c(998, 2002), start(rowRanges(vcf)))
	expect_equal(c(-2,2, -2,2), noname(unlist(info(vcf)$CIPOS)))
	expect_equal(c("N]chr1:2002]", "N]chr1:998]"), unlist(rowRanges(vcf)$ALT))
	expect_equal(c("++", "++"), .vcfAltToStrandPair(rowRanges(vcf)$ALT))

	vcf = .testrecord(c(
		"chr1	1000	a	N	[chr1:2000[A	.	.	SVTYPE=BND;CIPOS=-4,0;PARID=b",
		"chr1	2000	b	N	[chr1:1000[G	.	.	SVTYPE=BND;CIPOS=0,4;PARID=a"))
	vcf = align_breakpoints(vcf)
	expect_equal(c(998, 2002), start(rowRanges(vcf)))
	expect_equal(c(-2,2, -2,2), noname(unlist(info(vcf)$CIPOS)))
	expect_equal(c("[chr1:2002[N", "[chr1:998[N"), unlist(rowRanges(vcf)$ALT))
	expect_equal(c("--", "--"), .vcfAltToStrandPair(rowRanges(vcf)$ALT))
})
test_that("align_breakpoint centre should ensure odd length homology is consistent on both sides", {
	noname = function(x) { names(x) = NULL; return(x)}
	vcf = .testrecord(c(
		"chr1	1000	a	N	A[chr1:2000[	.	.	SVTYPE=BND;CIPOS=0,5;PARID=b",
		"chr1	2000	b	N	]chr1:1000]G	.	.	SVTYPE=BND;CIPOS=0,5;PARID=a"))
	vcf = align_breakpoints(vcf)
	expect_equal(c(1002, 2002), start(rowRanges(vcf)))
	expect_equal(c(-2,3, -2,3), noname(unlist(info(vcf)$CIPOS)))
	expect_equal(c("N[chr1:2002[", "]chr1:1002]N"), unlist(rowRanges(vcf)$ALT))

	vcf = .testrecord(c(
		"chr1	1000	a	N	]chr1:2000]A	.	.	SVTYPE=BND;CIPOS=0,5;PARID=b",
		"chr1	2000	b	N	G[chr1:1000[	.	.	SVTYPE=BND;CIPOS=0,5;PARID=a"))
	vcf = align_breakpoints(vcf)
	expect_equal(c(1002, 2002), start(rowRanges(vcf)))
	expect_equal(c(-2,3, -2,3), noname(unlist(info(vcf)$CIPOS)))
	expect_equal(c("]chr1:2002]N", "N[chr1:1002["), unlist(rowRanges(vcf)$ALT))

	vcf = .testrecord(c(
		"chr1	1000	a	N	A]chr1:2000]	.	.	SVTYPE=BND;CIPOS=-5,0;PARID=b",
		"chr1	2000	b	N	G]chr1:1000]	.	.	SVTYPE=BND;CIPOS=0,5;PARID=a"))
	vcf = align_breakpoints(vcf)
	expect_equal(c(998, 2002), start(rowRanges(vcf)))
	expect_equal(c(-3,2, -2,3), noname(unlist(info(vcf)$CIPOS)))
	expect_equal(c("N]chr1:2002]", "N]chr1:998]"), unlist(rowRanges(vcf)$ALT))

	vcf = .testrecord(c(
		"chr1	1000	a	N	[chr1:2000[A	.	.	SVTYPE=BND;CIPOS=-5,0;PARID=b",
		"chr1	2000	b	N	[chr1:1000[G	.	.	SVTYPE=BND;CIPOS=0,5;PARID=a"))
	vcf = align_breakpoints(vcf)
	expect_equal(c(998, 2002), start(rowRanges(vcf)))
	expect_equal(c(-3,2, -2,3), noname(unlist(info(vcf)$CIPOS)))
	expect_equal(c("[chr1:2002[N", "[chr1:998[N"), unlist(rowRanges(vcf)$ALT))
})
test_that("align_breakpoint should not touch other variants", {
	vcf = .testrecord(c(
		"chr1	1000	be1	N	AGT.	.	.	SVTYPE=BND;CIPOS=-5,0",
		"chr1	1000	b21	N	.AGT	.	.	SVTYPE=BND;CIPOS=-5,0",
		"chr1	1000	b21	N	<DEL>	.	.	SVTYPE=DEL;CIPOS=-5,0"))
	vcf = align_breakpoints(vcf)
	expect_equal(c("AGT.", ".AGT", "<DEL>"), unlist(rowRanges(vcf)$ALT))
})
test_that("breakpointRanges() should default to drop unpaired records.", {
	gr = expect_warning(breakpointRanges(gridss_missing_partner))
	expect_equal(2, length(gr))
})
test_that("breakpointRanges(inferMissingBreakends=TRUE) should add missing breakends.", {
	gr = breakpointRanges(gridss_missing_partner, inferMissingBreakends=TRUE)
	expect_equal(4, length(gr))
	expect_equal(c(18992158, 84963533, 84350, 4886681), start(gr))
	expect_equal(c("+", "-"), as.character(strand(gr))[1:2])
})

# CGTGTtgtagtaCCGTAA
#      -------       7bp del
# 0        1
# 123456789012345678

# Important symbolic allele info from the VCF specifications:
# 
# 1.6.1.4 If any of the ALT alleles is a symbolic allele (an angle-bracketed ID String “<ID>”) then the padding base is required and POS denotes the coordinate of the base preceding the polymorphism. 
# 1.6.1.8 End reference position (1-based), indicating the variant spans positions POS–END on reference/contig CHROM. Normally this is the position of the last base in the REF allele
test_that("representations are equivalent", {
    bpgr = breakpointRanges(representations)
    expect_equal(rep(-7, 6), bpgr$svLen)
    expect_equal(rep(c(5, 13), 3), start(bpgr))
    expect_equal(rep(c(5, 13), 3), end(bpgr))
})

# TODO: VCFv4.4 support
# - filter DEL/DUP on SVCLAIM
# - Basic support for CNV?
# - Deprecate SVTYPE

# VCFv4.4 PR: CIEND deprecate - use CILEN
    # need to define how CILEN interacts with CIPOS - there are multiple possible interpretation
    # [CIPOS_start, CIPOS_end], [CIEND_start, CIEND_end] implies CILEN=[CIPOS_end-CIEND_start, CIEND_end, CIPOS_start]
    # but the actual CILEN could be less. Homology
    # bonus: usually doesn't need to be written!
# VCFv4.4 PR: HOMLEN deprecate and replace with HOMPOS
    # Exact calls shouldn't have a CIPOS - they should use HOMPOS instead.
# VCFv4.4 PR: Event/Type should be type=A, not type=1

# VCFv4.4 PR: line 196: ALT=<BND> is not valid

# VCFv4.4 PR: update the \subsection{Encoding Structural Variants} example
	# The SV example should not include non-reserved subtypes

# VCFv4.4 PR: line 208: reserve all IUPAC ambiguity codes
# VCFv4.4 PR: line 208: can symoblic alleles be mixed with non-symbolic ones?
# VCFv4.4 TODO: why is the FORMAT CN field TYPE=1 ? Is it just the CN? Why Integer, not Float?

test_that("VCFv4.4 use ALT instead of SVTYPE", {
	vcf44 = .testrecordv44(c(
		# we'll ignore that the mate orientations are actually incorrect for now - we're just testing missing SVTYPE
		"chrA	1000	bp1	N	A]chrA:2000]	.	.	PARID=bp2",
		"chrA	2000	bp2	N	]chrA:1000]G	.	.	PARID=bp1",
		"chrA	1000	bp4	N	[chrA:2000[A	.	.	PARID=bp4",
		"chrA	2000	bp3	N	G[chrA:1000[	.	.	PARID=bp3",
		"chrA	2000	be1	N	.G	.	.	",
		"chrA	2000	be2	N	.G	.	.	",
		"chrA	1	del	A	<DEL>	0	.	SVLEN=10;SVCLAIM=J",
		"chrA	1	dup	A	<DUP>	0	.	SVLEN=10;SVCLAIM=J",
		"chrA	1	ins	A	<INS>	0	.	SVLEN=10",
		"chrA	1	inv	A	<INV>	0	.	SVLEN=10"))
	bpgr = breakpointRanges(vcf44)
	begr = breakendRanges(vcf44)
	expect_equal(sort(c(
		"bp1", "bp2", "bp3", "bp4",
		"del_bp1", "dup_bp1", "ins_bp1", "inv_bp1",
		"del_bp2", "dup_bp2", "ins_bp2", "inv_bp2",
		"inv_bp3", "inv_bp4")), sort(names(bpgr)))
	expect_equal(sort(c("be1", "be2")), sort(names(begr)))
})
test_that("VCFv4.4 CNV & SVCLAIM", {
	bpgr = breakpointRanges(.testrecordv44(c(
		"chrA	1	cnv_del	A	<DEL>	0	.	SVLEN=10;SVCLAIM=D",
		"chrA	1	cnv_dup	A	<DUP>	0	.	SVLEN=10;SVCLAIM=D",
		"chrA	1	cnv	A	<CNV>	0	.	SVLEN=10",
		"chrA	1	del_actual	A	<DEL>	0	.	SVLEN=10;SVCLAIM=DJ",
		"chrA	1	dup_actual	A	<DUP>	0	.	SVLEN=10;SVCLAIM=DJ",
		"chrA	1	del	A	<DEL>	0	.	SVLEN=10;SVCLAIM=J",
		"chrA	1	dup	A	<DUP>	0	.	SVLEN=10;SVCLAIM=J",
		"chrA	1	ins	A	<INS>	0	.	SVLEN=10")))
	expected_names = c("del_actual", "dup_actual", "del", "dup", "ins")
	expect_equal(c(paste0(expected_names, "_bp1"), paste0(expected_names, "_bp2")), names(bpgr))
})
test_that("VCFv4.4 SVLEN>END", {
	bpgr = breakpointRanges(.testrecordv44(c(
		"chrA	1	end	A	<DEL>	0	.	SVCLAIM=J;END=3",
		"chrA	1	svlen	A	<DEL>	0	.	SVCLAIM=J;SVLEN=5",
		"chrA	1	both	A	<DEL>	0	.	SVCLAIM=J;END=10;SVLEN=5")))
	expect_equal(4, start(bpgr["end_bp2"]))
	expect_equal(7, start(bpgr["svlen_bp2"]))
	expect_equal(7, start(bpgr["both_bp2"]))
})
test_that("VCFv4.4 CIEND>CILEN DEL", {
	bpgr = breakpointRanges(.testrecordv44(c(
		"chrA	10	both1	A	<DEL>	0	.	SVLEN=5;END=15;CIPOS=-2,2;CILEN=-2,2;CIEND=0,0",
		"chrA	10	both2	A	<DEL>	0	.	SVLEN=5;END=15;CIPOS=-2,2;CILEN=0,0;CIEND=-2,2",
		"chrA	10	cilen	A	<DEL>	0	.	SVLEN=7;CIPOS=-2,2;CILEN=-3,3", #END=17
		"chrA	10	ciend	A	<DEL>	0	.	END=14;CIPOS=-2,2;CIEND=-1,3",
		"chrA	10	inferred_end_bounds	A	<DEL>	0	.	SVLEN=7;CIPOS=-2,2")))
	expect_equal(c(10,16) + c(-2,  0), start(bpgr[c("both1_bp1", "both1_bp2")]))
	expect_equal(c(10,16) + c( 2,  0),   end(bpgr[c("both1_bp1", "both1_bp2")]))
	
	expect_equal(c(10,16) + c(-2, -2), start(bpgr[c("both2_bp1", "both2_bp2")]))
	expect_equal(c(10,16) + c( 2,  2),   end(bpgr[c("both2_bp1", "both2_bp2")]))
	
	# widest end bounds is [leftmost start & shortest, rightmost start + longest]
	expect_equal(c(10,18) + c(-2, -2-3), start(bpgr[c("cilen_bp1", "cilen_bp2")]))
	expect_equal(c(10,18) + c( 2, +2+3),   end(bpgr[c("cilen_bp1", "cilen_bp2")]))
	
	expect_equal(c(10,15) + c(-2, -1), start(bpgr[c("ciend_bp1", "ciend_bp2")]))
	expect_equal(c(10,15) + c( 2,  3),   end(bpgr[c("ciend_bp1", "ciend_bp2")]))
		
	# inferred end bounds should match start since SVLEN is known and fixed
	expect_equal(c(10,18) + c(-2, -2), start(bpgr[c("inferred_end_bounds_bp1", "inferred_end_bounds_bp2")]))
	expect_equal(c(10,18) + c( 2,  2),   end(bpgr[c("inferred_end_bounds_bp1", "inferred_end_bounds_bp2")]))
})
test_that("VCFv4.4 CIEND CILEN INV", {
	# inversion has a different codepath
	bpgr = breakpointRanges(.testrecordv44(c(
		"chrA	10	inv1	A	<INV>	0	.	SVLEN=5;CIPOS=-1,1;CILEN=-2,2;CIEND=0,0",
		"chrA	10	inv2	A	<INV>	0	.	SVLEN=5;CIPOS=-1,1;CILEN=-2,2")))
	expect_equal(c(16, 15) + 0,   start(bpgr[c("inv1_bp2", "inv1_bp4")]))
	expect_equal(c(16, 15) + 0,     end(bpgr[c("inv1_bp2", "inv1_bp4")]))
	expect_equal(c(16, 15) + -1-2,start(bpgr[c("inv2_bp2", "inv2_bp4")]))
	expect_equal(c(16, 15) + 1+2,   end(bpgr[c("inv2_bp2", "inv2_bp4")]))
})
test_that("VCFv4.4 CIEND CILEN INS", {
	bpgr = breakpointRanges(.testrecordv44(c(
		"chrA	10	inv1	A	<INS>	0	.	SVLEN=5;CIPOS=-1,1;CILEN=-2,2;CIEND=0,0",
		"chrA	10	inv2	A	<INS>	0	.	SVLEN=5;CIPOS=-1,1;CILEN=-2,2")))
	# CILEN doesn't impact INS bounds
	expect_equal(11 + c(0,-1),start(bpgr[c("inv1_bp2", "inv2_bp2")]))
	expect_equal(11 + c(0, 1),  end(bpgr[c("inv1_bp2", "inv2_bp2")]))
})
test_that("VCFv4.4 ALT symbolic subtypes", {
	bpgr = breakpointRanges(.testrecordv44(c(
		"chrA	1	ins	A	<INS:ME:ALU>	0	.	SVLEN=5",
		"chrA	1	dup	A	<DUP:TANDEM>	0	.	SVLEN=3"
	)))
	expect_equal(1, start(bpgr["ins_bp1"]))
	expect_equal(1,   end(bpgr["ins_bp1"]))
	expect_equal(2, start(bpgr["ins_bp2"]))
	expect_equal(2,   end(bpgr["ins_bp2"]))
	expect_equal(2, start(bpgr["dup_bp1"]))
	expect_equal(4, start(bpgr["dup_bp2"]))
	expect_equal("-", as.character(strand(bpgr["dup_bp1"])))
	expect_equal("+", as.character(strand(bpgr["dup_bp2"])))
})
test_that("VCFv4.4 EVENT", {
	bpgr = breakpointRanges(.testrecordv44(c(
		"chrA	1	e1	A	<DEL>	0	.	SVLEN=5;SVCLAIM=J;EVENT=complex;EVENTTYPE=chromothripsis",
		"chrA	10	e2	A	<DEL>	0	.	SVLEN=5;SVCLAIM=J;EVENT=complex;EVENTTYPE=chromothripsis",
		"chrA	20	untagged	A	<DEL>	0	.	SVLEN=5;SVCLAIM=J")))
	expect_equal(c("complex", "complex", NA_character_), bpgr[c("e1_bp1", "e2_bp1", "untagged_bp1")]$event)
})
test_that("VCFv4.4 non-SV symbolic alleles", {
	bpgr = breakpointRanges(.testrecordv44(c(
		"chrA	1	sym1	A	<*>	0	.	END=10",
		"chrA	10	sym2	A	<NON_REF>	0	.	")))
	expect_equal(0, length(bpgr))
})
test_that("SVLEN", {
	bpgr = breakpointRanges(.testrecordv44(c(
		"chrA	2	ins	A	<INS>	0	.	SVLEN=3",
		"chrA	2	dup	A	<DUP>	0	.	SVLEN=3",
		"chrA	2	del	A	<DEL>	0	.	SVLEN=3",
		"chrA	2	inv	A	<INV>	0	.	SVLEN=3",
		"chrA	2	cnv	A	<CNV>	0	.	SVLEN=3")))
	expect_equal(end(bpgr)-start(bpgr), rep(0, length(bpgr)))
	expect_equal(c(2, 3), start(bpgr[c("ins_bp1", "ins_bp2")]))
	expect_equal(c(3, 5), start(bpgr[c("dup_bp1", "dup_bp2")]))
	expect_equal(c(2, 6), start(bpgr[c("del_bp1", "del_bp2")]))
	expect_equal(c(3, 6, 2, 5), start(bpgr[c("inv_bp1", "inv_bp2", "inv_bp3", "inv_bp4")]))
	
	expect_equal(c("+", "-"), as.character(strand(bpgr[c("ins_bp1", "ins_bp2")])))
	expect_equal(c("-", "+"), as.character(strand(bpgr[c("dup_bp1", "dup_bp2")])))
	expect_equal(c("+", "-"), as.character(strand(bpgr[c("del_bp1", "del_bp2")])))
	expect_equal(c("-", "-", "+", "+"), as.character(strand(bpgr[c("inv_bp1", "inv_bp2", "inv_bp3", "inv_bp4")])))
})
# test_that("VCFv4.4 support multiple SV ALT alleles", {
# 	bpgr = breakpointRanges(.testrecordv44(c(
# 		"chrA	10	multi	A	<DEL>,<DUP>	0	.	SVLEN=5,10;CIPOS=-1,1,-2,2;SVCLAIM=J")))
# 	expect_equal(4, length(bpgr))
# 	expect_equal(c("multi_bp1", "multi_bp2", "multi_alt2_bp1", "multi_alt2_bp2"), names(bpgr))
# 	expect_equal(c(10-1, 10+5-1), start(bpgr[c("multi_bp1", "multi_bp2")]))
# 	expect_equal(c(10+1, 10+5+1),   end(bpgr[c("multi_bp1", "multi_bp2")]))
# 	expect_equal(c(10-2, 10+10-2), start(bpgr[c("multi_alt2_bp1", "multi_alt2_bp2")]))
# 	expect_equal(c(10+2, 10+10+2),   end(bpgr[c("multi_alt2_bp1", "multi_alt2_bp2")]))
# })