1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
|
from unittest import TestCase
import numpy
from cogent3 import DNA, make_unaligned_seqs
from cogent3.core.alignment import Aligned, ArrayAlignment
from cogent3.core.location import IndelMap
from cogent3.draw.dotplot import Dotplot, _convert_input, get_align_coords
class TestUtilFunctions(TestCase):
def test_len_seq(self):
"""returns length of sequence minus gaps"""
m, seq = DNA.make_seq("ACGGT--A").parse_out_gaps()
self.assertEqual(m.parent_length, 6)
def test_convert_input(self):
"""converts data for dotplotting"""
m, seq = DNA.make_seq("ACGGT--A").parse_out_gaps()
aligned_seq = Aligned(m, seq)
mapped_gap, new_seq = _convert_input(aligned_seq, None)
self.assertIs(new_seq.moltype, DNA)
self.assertIs(mapped_gap, m)
self.assertIs(new_seq, seq)
mapped_gap, new_seq = _convert_input("ACGGT--A", DNA)
self.assertEqual(str(mapped_gap), str(m))
self.assertEqual(str(new_seq), str(seq))
def test_get_align_coords(self):
"""correctly returns the alignment coordinates"""
# 01234 5
# ACGGT--A
# 012345
# --GGTTTA
m1, seq1 = DNA.make_seq("ACGGT--A").parse_out_gaps()
m2, seq2 = DNA.make_seq("--GGTTTA").parse_out_gaps()
path = get_align_coords(m1, m2)
expect = [2, 4, None, 5, 5], [0, 2, None, 5, 5]
self.assertEqual(path.get_coords(), expect)
# we have no gaps, so coords will be None
m1, s1 = seq1.parse_out_gaps()
m2, s2 = seq2.parse_out_gaps()
path = get_align_coords(m1, m2)
self.assertEqual(path.get_coords(), ([], []))
# unless we indicate the seqs came from an Alignment
m1, seq1 = DNA.make_seq("ACGGTTTA").parse_out_gaps()
m2, seq2 = DNA.make_seq("GGGGTTTA").parse_out_gaps()
paths = get_align_coords(m1, m2, aligned=True)
# display ranges are inclusive, thus length - 1
self.assertEqual(paths.get_coords(), ([0, len(seq1) - 1], [0, len(seq1) - 1]))
# raises an exception if the Aligned seqs are different lengths
m1, seq1 = DNA.make_seq("ACGGTTTA").parse_out_gaps()
m2, seq2 = DNA.make_seq("GGGGTT").parse_out_gaps()
with self.assertRaises(AssertionError):
get_align_coords(m1, m2, aligned=True)
def test_display2d(self):
"""correctly constructs a Display2d"""
# 111111
# 0123456789012345
# 11111
# 012345678901234
# -TGATGTAAGGTAGTT
# CTGG---AAG---GGT
# 0123 456 789
# check alignment coords are correct
dp = Dotplot("-TGATGTAAGGTAGTT", "CTGG---AAG---GGT", is_aligned=True, window=5)
expect = [0, 2, None, 6, 8, None, 12, 14], [1, 3, None, 4, 6, None, 7, 9]
self.assertEqual(dp._aligned_coords.get_coords(), expect)
dp._build_fig()
traces = dp.traces
self.assertEqual(len(traces), 2) # no rev complement
# we nudge alignment coordinates by 0.2 on x-axis
expect = [0.2, 2.2, None, 6.2, 8.2, None, 12.2, 14.2]
self.assertEqual(traces[-1].x, expect)
self.assertEqual(traces[-1].name, "Alignment")
self.assertEqual(traces[0].name, "+ strand")
# check + strand has integers/float/None
expect = {int, float, type(None), numpy.int64, numpy.int32}
for trace in traces:
for axis in "xy":
got = {type(v) for v in trace[axis]}
self.assertTrue(got <= expect, trace[axis])
def test_display2d_rc(self):
"""correctly constructs a Display2d with rc"""
# fail if specify rc but incompatible moltype
seqs = make_unaligned_seqs(
{"a": "-TGATGTAAGGTAGTT", "b": "CTGG---AAG---GGT"}, moltype="text"
)
with self.assertRaises(TypeError):
Dotplot(*seqs.seqs, is_aligned=True, window=5, rc=True)
seqs = make_unaligned_seqs(
{"a": "-TGATGTAAGGTAGTT", "b": "CTGG---AAG---GGT"}, moltype="dna"
)
dp = Dotplot(*seqs.seqs, is_aligned=True, window=5, rc=True)
dp._build_fig()
traces = dp.traces
self.assertEqual(len(traces), 3)
self.assertEqual(traces[1].name, "- strand")
def test_align_without_gaps(self):
"""dotplot has alignment coordinates if no gaps"""
aln = ArrayAlignment(
{"seq1": "ACGG", "seq2": "CGCA", "seq3": "CCG-"}, moltype="dna"
)
aln_plot = aln.dotplot("seq1")
self.assertNotEqual(aln_plot._aligned_coords, None)
def test_dotplot_seqcoll(self):
"""dotplot sequence collection, gaps are removed"""
seqs = make_unaligned_seqs(
{"seq1": "ACGG", "seq2": "CGCA", "seq3": "CCG-"}, moltype="dna"
)
dp = seqs.dotplot("seq1", "seq3")
self.assertNotEqual(dp._aligned_coords, None)
self.assertEqual(len(dp.seq1), 4)
self.assertEqual(len(dp.seq2), 3)
def test_dotplot_single(self):
"""dotplot with single sequence should not fail"""
seqs = make_unaligned_seqs({"seq1": "CACACCACTGCAGTCGGATAGACC"}, moltype="dna")
dp = seqs.dotplot(window=4, threshold=4, rc=True)
self.assertEqual(dp.seq1, dp.seq2)
def test_dotplot_missing(self):
"""fail if a sequence name not present"""
seqs = make_unaligned_seqs(
{"seq1": "ACGG", "seq2": "CGCA", "seq3": "CCG-"}, moltype="dna"
)
with self.assertRaises(ValueError):
_ = seqs.dotplot("seq1", "seq5")
with self.assertRaises(ValueError):
_ = seqs.dotplot("seq5", "seq1")
with self.assertRaises(ValueError):
_ = seqs.dotplot("seq5", "seq6")
def test_dotplot_title(self):
"""setting empty string title works"""
seqs = make_unaligned_seqs(
{"seq1": "ACGG", "seq2": "CGCA", "seq3": "CCG-"}, moltype="dna"
)
dp = seqs.dotplot("seq1", "seq3", title="")
self.assertEqual(dp.figure.layout.title, "")
def test_aligned_path():
imap1 = IndelMap(
gap_pos=numpy.array([4, 5, 6, 8, 10]),
cum_gap_lengths=numpy.array([6, 10, 12, 14, 15]),
parent_length=10,
)
imap2 = IndelMap(
gap_pos=numpy.array([], dtype=int),
cum_gap_lengths=numpy.array([], dtype=int),
parent_length=25,
)
coords = get_align_coords(imap1, imap2)
assert coords.get_coords() == (
[0, 3, None, 4, 4, None, 5, 5, None, 6, 7, None, 8, 9],
[0, 3, None, 10, 10, None, 15, 15, None, 18, 19, None, 22, 23],
)
def test_dotplot_unaligned():
seqs = make_unaligned_seqs(dict(a="ACGGT", b="CGTT"), moltype="dna")
dp = seqs.dotplot(window=3, k=2)
assert dp
# trigger building traces
_ = dp.figure
assert len(dp.traces[0].x)
|