File: test_cigar.py

package info (click to toggle)
python-cogent 2024.5.7a1%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 74,600 kB
  • sloc: python: 92,479; makefile: 117; sh: 16
file content (91 lines) | stat: -rw-r--r-- 3,166 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import unittest

from cogent3 import DNA, make_aligned_seqs
from cogent3.core.annotation_db import GffAnnotationDb
from cogent3.parse.cigar import (
    CigarParser,
    aligned_from_cigar,
    cigar_to_map,
    map_to_cigar,
    slice_cigar,
)


class TestCigar(unittest.TestCase):
    def setUp(self):
        self.cigar_text = "3D2M3D6MDM2D3MD"
        self.aln_seq = DNA.make_seq("---AA---GCTTAG-A--CCT-")
        self.aln_seq1 = DNA.make_seq("CCAAAAAA---TAGT-GGC--G")
        self.map, self.seq = self.aln_seq.parse_out_gaps()
        self.map1, self.seq1 = self.aln_seq1.parse_out_gaps()
        self.slices = [(1, 4), (0, 8), (7, 12), (0, 1), (3, 5)]
        self.aln = make_aligned_seqs(
            {"FAKE01": self.aln_seq, "FAKE02": self.aln_seq1}, array_align=False
        )
        self.cigars = {"FAKE01": self.cigar_text, "FAKE02": map_to_cigar(self.map1)}
        self.seqs = {"FAKE01": str(self.seq), "FAKE02": str(self.seq1)}

    def test_map_to_cigar(self):
        """convert a Map to cigar string"""
        assert map_to_cigar(self.map) == self.cigar_text

    def test_cigar_to_map(self):
        """test generating a Map from cigar"""
        map = cigar_to_map(self.cigar_text)
        assert str(map) == str(self.map)

    def test_aligned_from_cigar(self):
        """test generating aligned seq from cigar"""
        aligned_seq = aligned_from_cigar(self.cigar_text, self.seq)
        assert aligned_seq == self.aln_seq

    def test_slice_cigar(self):
        """test slicing cigars"""
        for start, end in self.slices:
            # test by_align = True
            map1, loc1 = slice_cigar(self.cigar_text, start, end)
            ori1 = self.aln_seq[start:end]
            if loc1:
                slicealn1 = self.seq[loc1[0] : loc1[1]].gapped_by_map(map1)
                assert ori1 == slicealn1
            else:
                assert len(map1) == len(ori1)

            # test by_align = False
            map2, loc2 = slice_cigar(self.cigar_text, start, end, by_align=False)
            slicealn2 = self.seq[start:end].gapped_by_map(map2)
            ori2 = self.aln_seq[loc2[0] : loc2[1]]
            assert slicealn2 == ori2

    def test_CigarParser(self):
        """test without slice"""
        aln = CigarParser(self.seqs, self.cigars)
        aln = aln.to_type(array_align=False)
        assert aln == self.aln
        # test slice
        db = GffAnnotationDb()
        aln.annotation_db = db
        i = 1
        for start, end in self.slices:
            db.add_feature(
                seqid="FAKE01", biotype=f"annot{i}", name="annot", spans=[(start, end)]
            )

            annot = list(aln.get_features(biotype=f"annot{i}"))
            annot = annot[0].union(annot[1:])
            slice_aln = annot.as_one_span().get_slice()
            i += 1

            cmp_aln = CigarParser(
                self.seqs,
                self.cigars,
                sliced=True,
                ref_seqname="FAKE01",
                start=start,
                end=end,
            )
            assert cmp_aln.to_dict() == slice_aln.to_dict(), (start, end)


if __name__ == "__main__":
    unittest.main()