# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.


"""Tests for mapping pairwise alignments."""

import os
import random
import unittest

try:
    import numpy as np
except ImportError:
    from Bio import MissingPythonDependencyError

    raise MissingPythonDependencyError(
        "Install numpy if you want to use Bio.Align.Alignment.map."
    ) from None

from Bio import Align
from Bio import SeqIO
from Bio.Align import Alignment
from Bio.Align import PairwiseAligner
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord


class TestSimple(unittest.TestCase):
    aligner = PairwiseAligner()
    aligner.internal_open_gap_score = -1
    aligner.internal_extend_gap_score = -0.0
    aligner.match_score = +1
    aligner.mismatch_score = -1
    aligner.mode = "local"

    def test_internal(self):
        aligner = self.aligner
        chromosome = Seq("AAAAAAAAAAAAGGGGGGGCCCCCGGGGGGAAAAAAAAAA")
        chromosome.id = "chromosome"
        transcript = Seq("GGGGGGGCCCCCGGGGGGA")
        transcript.id = "transcript"
        sequence = Seq("GGCCCCCGGG")
        sequence.id = "sequence"
        alignments1 = aligner.align(chromosome, transcript)
        self.assertEqual(len(alignments1), 1)
        alignment1 = alignments1[0]
        self.assertTrue(
            np.array_equal(alignment1.coordinates, np.array([[12, 31], [0, 19]]))
        )
        self.assertEqual(
            str(alignment1),
            """\
chromosom        12 GGGGGGGCCCCCGGGGGGA 31
                  0 ||||||||||||||||||| 19
transcrip         0 GGGGGGGCCCCCGGGGGGA 19
""",
        )
        alignments2 = aligner.align(transcript, sequence)
        self.assertEqual(len(alignments2), 1)
        alignment2 = alignments2[0]
        self.assertTrue(
            np.array_equal(alignment2.coordinates, np.array([[5, 15], [0, 10]]))
        )
        self.assertEqual(
            str(alignment2),
            """\
transcrip         5 GGCCCCCGGG 15
                  0 |||||||||| 10
sequence          0 GGCCCCCGGG 10
""",
        )
        alignment = alignment1.map(alignment2)
        self.assertTrue(
            np.array_equal(alignment.coordinates, np.array([[17, 27], [0, 10]]))
        )
        self.assertEqual(
            str(alignment),
            """\
chromosom        17 GGCCCCCGGG 27
                  0 |||||||||| 10
sequence          0 GGCCCCCGGG 10
""",
        )
        line = format(alignment, "psl")
        self.assertEqual(
            line,
            """\
10	0	0	0	0	0	0	0	+	sequence	10	0	10	chromosome	40	17	27	1	10,	0,	17,
""",
        )

    def test_left_overhang(self):
        aligner = self.aligner
        chromosome = Seq("GGGCCCCCGGGGGGAAAAAAAAAA")
        chromosome.id = "chromosome"
        transcript = Seq("AGGGGGCCCCCGGGGGGA")
        transcript.id = "transcript"
        sequence = Seq("GGGGGCCCCCGGG")
        sequence.id = "sequence"
        alignments1 = aligner.align(chromosome, transcript)
        self.assertEqual(len(alignments1), 1)
        alignment1 = alignments1[0]
        self.assertEqual(
            str(alignment1),
            """\
chromosom         0 GGGCCCCCGGGGGGA 15
                  0 ||||||||||||||| 15
transcrip         3 GGGCCCCCGGGGGGA 18
""",
        )
        alignments2 = aligner.align(transcript, sequence)
        self.assertEqual(len(alignments2), 1)
        alignment2 = alignments2[0]
        self.assertEqual(
            str(alignment2),
            """\
transcrip         1 GGGGGCCCCCGGG 14
                  0 ||||||||||||| 13
sequence          0 GGGGGCCCCCGGG 13
""",
        )
        alignment = alignment1.map(alignment2)
        self.assertTrue(
            np.array_equal(alignment.coordinates, np.array([[0, 11], [2, 13]]))
        )
        self.assertEqual(
            str(alignment),
            """\
chromosom         0 GGGCCCCCGGG 11
                  0 ||||||||||| 11
sequence          2 GGGCCCCCGGG 13
""",
        )
        line = format(alignment, "psl")
        self.assertEqual(
            line,
            """\
11	0	0	0	0	0	0	0	+	sequence	13	2	13	chromosome	24	0	11	1	11,	2,	0,
""",
        )

    def test_right_overhang(self):
        aligner = self.aligner
        chromosome = Seq("AAAAAAAAAAAAGGGGGGGCCCCCGGG")
        chromosome.id = "chromosome"
        transcript = Seq("GGGGGGGCCCCCGGGGGGA")
        transcript.id = "transcript"
        sequence = Seq("GGCCCCCGGGGG")
        sequence.id = "sequence"
        alignments1 = aligner.align(chromosome, transcript)
        self.assertEqual(len(alignments1), 1)
        alignment1 = alignments1[0]
        self.assertEqual(
            str(alignment1),
            """\
chromosom        12 GGGGGGGCCCCCGGG 27
                  0 ||||||||||||||| 15
transcrip         0 GGGGGGGCCCCCGGG 15
""",
        )
        alignments2 = aligner.align(transcript, sequence)
        self.assertEqual(len(alignments2), 1)
        alignment2 = alignments2[0]
        self.assertEqual(
            str(alignment2),
            """\
transcrip         5 GGCCCCCGGGGG 17
                  0 |||||||||||| 12
sequence          0 GGCCCCCGGGGG 12
""",
        )
        alignment = alignment1.map(alignment2)
        self.assertTrue(
            np.array_equal(alignment.coordinates, np.array([[17, 27], [0, 10]]))
        )
        self.assertEqual(
            str(alignment),
            """\
chromosom        17 GGCCCCCGGG 27
                  0 |||||||||| 10
sequence          0 GGCCCCCGGG 10
""",
        )
        line = format(alignment, "psl")
        self.assertEqual(
            line,
            """\
10	0	0	0	0	0	0	0	+	sequence	12	0	10	chromosome	27	17	27	1	10,	0,	17,
""",
        )

    def test_reverse_transcript(self):
        aligner = self.aligner
        chromosome = Seq("AAAAAAAAAAAAGGGGGGGCCCCCGGGGGGAAAAAAAAAA")
        chromosome.id = "chromosome"
        transcript = Seq("TCCCCCCGGGGGCCCCCCC")
        transcript.id = "transcript"
        sequence = Seq("GGCCCCCGGG")
        sequence.id = "sequence"
        alignments1 = aligner.align(chromosome, transcript, strand="-")
        self.assertEqual(len(alignments1), 1)
        alignment1 = alignments1[0]
        self.assertTrue(
            np.array_equal(alignment1.coordinates, np.array([[12, 31], [19, 0]]))
        )
        self.assertEqual(
            str(alignment1),
            """\
chromosom        12 GGGGGGGCCCCCGGGGGGA 31
                  0 ||||||||||||||||||| 19
transcrip        19 GGGGGGGCCCCCGGGGGGA  0
""",
        )
        alignments2 = aligner.align(transcript, sequence, strand="-")
        self.assertEqual(len(alignments2), 1)
        alignment2 = alignments2[0]
        self.assertTrue(
            np.array_equal(alignment2.coordinates, np.array([[4, 14], [10, 0]]))
        )
        self.assertEqual(
            str(alignment2),
            """\
transcrip         4 CCCGGGGGCC 14
                  0 |||||||||| 10
sequence         10 CCCGGGGGCC  0
""",
        )
        alignment = alignment1.map(alignment2)
        self.assertTrue(
            np.array_equal(alignment.coordinates, np.array([[17, 27], [0, 10]]))
        )
        self.assertEqual(
            str(alignment),
            """\
chromosom        17 GGCCCCCGGG 27
                  0 |||||||||| 10
sequence          0 GGCCCCCGGG 10
""",
        )
        line = format(alignment, "psl")
        self.assertEqual(
            line,
            """\
10	0	0	0	0	0	0	0	+	sequence	10	0	10	chromosome	40	17	27	1	10,	0,	17,
""",
        )

    def test_reverse_sequence(self):
        aligner = self.aligner
        chromosome = Seq("AAAAAAAAAAAAGGGGGGGCCCCCGGGGGGAAAAAAAAAA")
        chromosome.id = "chromosome"
        transcript = Seq("GGGGGGGCCCCCGGGGGGA")
        transcript.id = "transcript"
        sequence = Seq("CCCGGGGGCC")
        sequence.id = "sequence"
        alignments1 = aligner.align(chromosome, transcript)
        self.assertEqual(len(alignments1), 1)
        alignment1 = alignments1[0]
        self.assertTrue(
            np.array_equal(alignment1.coordinates, np.array([[12, 31], [0, 19]]))
        )
        self.assertEqual(
            str(alignment1),
            """\
chromosom        12 GGGGGGGCCCCCGGGGGGA 31
                  0 ||||||||||||||||||| 19
transcrip         0 GGGGGGGCCCCCGGGGGGA 19
""",
        )
        alignments2 = aligner.align(transcript, sequence, "-")
        self.assertEqual(len(alignments2), 1)
        alignment2 = alignments2[0]
        self.assertTrue(
            np.array_equal(alignment2.coordinates, np.array([[5, 15], [10, 0]]))
        )
        self.assertEqual(
            str(alignment2),
            """\
transcrip         5 GGCCCCCGGG 15
                  0 |||||||||| 10
sequence         10 GGCCCCCGGG  0
""",
        )
        alignment = alignment1.map(alignment2)
        self.assertTrue(
            np.array_equal(alignment.coordinates, np.array([[17, 27], [10, 0]]))
        )
        self.assertEqual(
            str(alignment),
            """\
chromosom        17 GGCCCCCGGG 27
                  0 |||||||||| 10
sequence         10 GGCCCCCGGG  0
""",
        )
        line = format(alignment, "psl")
        self.assertEqual(
            line,
            """\
10	0	0	0	0	0	0	0	-	sequence	10	0	10	chromosome	40	17	27	1	10,	0,	17,
""",
        )

    def test_reverse_transcript_sequence(self):
        aligner = self.aligner
        chromosome = Seq("AAAAAAAAAAAAGGGGGGGCCCCCGGGGGGAAAAAAAAAA")
        chromosome.id = "chromosome"
        transcript = Seq("TCCCCCCGGGGGCCCCCCC")
        transcript.id = "transcript"
        sequence = Seq("CCCGGGGGCC")
        sequence.id = "sequence"
        alignments1 = aligner.align(chromosome, transcript, "-")
        self.assertEqual(len(alignments1), 1)
        alignment1 = alignments1[0]
        self.assertTrue(
            np.array_equal(alignment1.coordinates, np.array([[12, 31], [19, 0]]))
        )
        self.assertEqual(
            str(alignment1),
            """\
chromosom        12 GGGGGGGCCCCCGGGGGGA 31
                  0 ||||||||||||||||||| 19
transcrip        19 GGGGGGGCCCCCGGGGGGA  0
""",
        )
        alignments2 = aligner.align(transcript, sequence)
        self.assertEqual(len(alignments2), 1)
        alignment2 = alignments2[0]
        self.assertTrue(
            np.array_equal(alignment2.coordinates, np.array([[4, 14], [0, 10]]))
        )
        self.assertEqual(
            str(alignment2),
            """\
transcrip         4 CCCGGGGGCC 14
                  0 |||||||||| 10
sequence          0 CCCGGGGGCC 10
""",
        )
        alignment = alignment1.map(alignment2)
        self.assertTrue(
            np.array_equal(alignment.coordinates, np.array([[17, 27], [10, 0]]))
        )
        self.assertEqual(
            str(alignment),
            """\
chromosom        17 GGCCCCCGGG 27
                  0 |||||||||| 10
sequence         10 GGCCCCCGGG  0
""",
        )
        line = format(alignment, "psl")
        self.assertEqual(
            line,
            """\
10	0	0	0	0	0	0	0	-	sequence	10	0	10	chromosome	40	17	27	1	10,	0,	17,
""",
        )


class TestComplex(unittest.TestCase):
    aligner = PairwiseAligner()
    aligner.internal_open_gap_score = -1
    aligner.internal_extend_gap_score = -0.0
    aligner.match_score = +1
    aligner.mismatch_score = -1
    aligner.mode = "local"

    def test1(self):
        aligner = self.aligner
        chromosome = Seq(
            "GCCTACCGTATAACAATGGTTATAATACAAGGCGGTCATAATTAAAGGGAGTGCAGCAACGGCCTGCTCTCCAAAAAAACAGGTTTTATGAAAAGAAAGTGCATTAACTGTTAAAGCCGTCATATCGGTGGGTTCTGCCAGTCACCGGCATACGTCCTGGGACAAAGACTTTTTACTACAATGCCAGGCGGGAGAGTCACCCGCCGCGGTGTCGACCCAGGGGACAGCGGGAAGATGTCGTGGTTTCCTTGTCATTAACCAACTCCATCTTAAAAGCTCCTCTAGCCATGGCATGGTACGTTGCGCGCACCCTTTTATCGGTAAGGCGCGGTGACTCTCTCCCAAAACAGTGCCATAATGGTTCGCTTCCTACCTAAGGCACTTACGGCCAATTAATGCGCAAGCGAGCGGAAGGTCTAACAGGGCACCGAATTCGATTA"
        )
        chromosome.id = "chromosome"
        transcript = Seq(
            "GGAATTTTAGCAGCCAAAGGACGGATCCTCCAAGGGGCCCCAGCACAGCACATTTTTAACGCGAACTAAGCGGGAGCGCATGTGGGACAGTTGATCCCATCCGCCTCAAAATTTCTCGCAATATCGGTTGGGGCACAGGTCCACTTTACGAATTCATACCGTGGTAGAGACCTTTATTAGATAGATATGACTGTTTGATTGCGGCATAGTACGACGAAGCAAGGGGATGGACGTTTCGGTTGCATTCGACCGGGTTGGGTCGAAAAACAGGTTTTATGAAAAGAAAGTGCATTAACTGTTAAAGCCGTCATATCGGTGGGTTC"
        )
        transcript.id = "transcript"
        sequence = Seq(
            "TCCAAGGGGCCCCAGCACAGCACATTTTTAACGCGGGGACAGTTGATCCCATCCGCCTTTTACGAATTCATACCGTGGTAGGCGGCATAGTACGACGAAGCGGTTGGGTCGAAAAACAGGTTGCCGTCATATCGGTGGGTTC"
        )
        sequence.id = "sequence"
        alignments1 = aligner.align(chromosome, transcript)
        alignment1 = alignments1[0]
        self.assertEqual(alignment1.coordinates.shape[1], 164)
        self.assertEqual(
            str(alignment1),
            """\
chromosom        14 AATGGTTATA------ATACAAGG-CGG----TCATAATTAAAGGGAGTG---CAGCAAC
                  0 |||--||-||------|---||||-|||----||------.|||||---|---|||||--
transcrip         2 AAT--TT-TAGCAGCCA---AAGGACGGATCCTC------CAAGGG---GCCCCAGCA--

chromosom        60 GGCCTGCTCTCCAAAAAAACAGGTTTTATGAAAAGAAAGTGCATTAACTGTTAAAGC---
                 60 ---|.||-----------|||--||||-|------||.|.|----||||----||||---
transcrip        45 ---CAGC-----------ACA--TTTT-T------AACGCG----AACT----AAGCGGG

chromosom       117 --CGTCATATCGGTGG----GTTCTGCCAGTCACCGGCATACGTCCTGGGACAAAGACTT
                120 --||-|||----||||----||--||--|-|--||--|||.||-|||----||||-|---
transcrip        74 AGCG-CAT----GTGGGACAGT--TG--A-T--CC--CATCCG-CCT----CAAA-A---

chromosom       171 TTTACT-ACAATGCCAGGCGGGAGAGTCACCCGCCGCGGTGTCGACCCAGGGG-ACAGCG
                180 |||-||-.||||------------|-|---------||||-|-------||||-||||--
transcrip       111 TTT-CTCGCAAT------------A-T---------CGGT-T-------GGGGCACAG--

chromosom       229 GGAAGATGTCGTGGTTTC-CTT---G---TCATTAACC-------A-ACTCCATCTTA--
                240 -------|||-------|-|||---|---||||--|||-------|-|--||-|-|||--
transcrip       138 -------GTC-------CACTTTACGAATTCAT--ACCGTGGTAGAGA--CC-T-TTATT

chromosom       272 AAAGCTCCTCTAGCCATGGCATG---GT---ACGTTGCGCGCACCCTTTTA-T----CG-
                300 |.|-------|||--||---|||---||---|--|||||-|||------||-|----||-
transcrip       178 AGA-------TAG--AT---ATGACTGTTTGA--TTGCG-GCA------TAGTACGACGA

chromosom       320 -GTAAGG-------CG---CGGT-------GACTCTC--------TCCCAAAACAGTGCC
                360 -|.||||-------||---||||-------|||---|--------||..||||||-----
transcrip       217 AGCAAGGGGATGGACGTTTCGGTTGCATTCGAC---CGGGTTGGGTCGAAAAACA-----

chromosom       354 ATAATGGTTCGCTTCCTACCT-------AAG-GCACTT-ACGGCCAATTAATGCGCAAGC
                420 -----|||----||--||--|-------|||-|||-||-||.|----|||------||||
transcrip       269 -----GGT----TT--TA--TGAAAAGAAAGTGCA-TTAACTG----TTA------AAGC

chromosom       405 GAGCGGAAGGTC-TAACAG-GGCACCGAATTC 435
                480 ---|-----|||-||.|.|-||----|--||| 512
transcrip       305 ---C-----GTCATATCGGTGG----G--TTC 323
""",
        )
        alignments2 = aligner.align(transcript, sequence)
        alignment2 = alignments2[0]
        self.assertEqual(alignment2.coordinates.shape[1], 12)
        self.assertEqual(
            str(alignment2),
            """\
transcrip        28 TCCAAGGGGCCCCAGCACAGCACATTTTTAACGCGAACTAAGCGGGAGCGCATGTGGGAC
                  0 |||||||||||||||||||||||||||||||||||--------------------|||||
sequence          0 TCCAAGGGGCCCCAGCACAGCACATTTTTAACGCG--------------------GGGAC

transcrip        88 AGTTGATCCCATCCGCCTCAAAATTTCTCGCAATATCGGTTGGGGCACAGGTCCACTTTA
                 60 ||||||||||||||||||--------------------------------------||||
sequence         40 AGTTGATCCCATCCGCCT--------------------------------------TTTA

transcrip       148 CGAATTCATACCGTGGTAGAGACCTTTATTAGATAGATATGACTGTTTGATTGCGGCATA
                120 |||||||||||||||||||---------------------------------||||||||
sequence         62 CGAATTCATACCGTGGTAG---------------------------------GCGGCATA

transcrip       208 GTACGACGAAGCAAGGGGATGGACGTTTCGGTTGCATTCGACCGGGTTGGGTCGAAAAAC
                180 ||||||||||||--------------------------------||||||||||||||||
sequence         89 GTACGACGAAGC--------------------------------GGTTGGGTCGAAAAAC

transcrip       268 AGGTTTTATGAAAAGAAAGTGCATTAACTGTTAAAGCCGTCATATCGGTGGGTTC 323
                240 |||||------------------------------|||||||||||||||||||| 295
sequence        117 AGGTT------------------------------GCCGTCATATCGGTGGGTTC 142
""",
        )
        alignment = alignment1.map(alignment2)
        self.assertEqual(alignment.coordinates.shape[1], 76)
        self.assertEqual(
            str(alignment),
            """\
chromosom        35 TCATAATTAAAGGGAGTG---CAGCAACGGCCTGCTCTCCAAAAAAACAGGTTTTATGAA
                  0 ||------.|||||---|---|||||-----|.||-----------|||--||||-|---
sequence          0 TC------CAAGGG---GCCCCAGCA-----CAGC-----------ACA--TTTT-T---

chromosom        92 AAGAAAGTGCATTAACTGTTAAAGCCGTCATATCGGTGG----GTTCTGCCAGTCACCGG
                 60 ---||.|.|----------------------------||----||--||--|-|--||--
sequence         29 ---AACGCG----------------------------GGGACAGT--TG--A-T--CC--

chromosom       148 CATACGTCCTGGGACAAAGACTTTTTACTACAATGCCAGGCGGGAGAGTCACCCGCCGCG
                120 |||.||-|||--------------------------------------------------
sequence         49 CATCCG-CCT--------------------------------------------------

chromosom       208 GTGTCGACCCAGGGGACAGCGGGAAGATGTCGTGGTTTCCTT---G---TCATTAACCAA
                180 ----------------------------------------||---|---||||--|||--
sequence         58 ----------------------------------------TTTACGAATTCAT--ACC--

chromosom       262 CTCCATCTTAAAAGCTCCTCTAGCCATGGCATGGTACGTT-------GCGCGCACCCTTT
                240 -----------------------------------------------|||-|||------
sequence         74 ----------------------------------------GTGGTAGGCG-GCA------

chromosom       315 TA-T----CG--GTAAGGCGCGGTGACTCTC-------TCCCAAAACAGTGCCATAATGG
                300 ||-|----||--|.------------------------||..||||||----------||
sequence         87 TAGTACGACGAAGC-----------------GGTTGGGTCGAAAAACA----------GG

chromosom       361 TTCGCTTCCTACCTAAGGCACTTACGGCCAATTAATGCGCAAGCGAGCGGAAGGTC-TAA
                360 |----|------------------------------------||---|-----|||-||.
sequence        120 T----T------------------------------------GC---C-----GTCATAT

chromosom       420 CAG-GGCACCGAATTC 435
                420 |.|-||----|--||| 436
sequence        132 CGGTGG----G--TTC 142
""",
        )
        line = format(alignment, "psl")
        self.assertEqual(
            line,
            """\
96	10	0	0	11	36	27	294	+	sequence	142	0	142	chromosome	440	35	435	37	2,6,1,5,4,3,4,1,6,2,2,2,1,1,2,6,3,2,1,4,3,3,3,2,1,2,2,10,3,1,2,1,3,6,2,1,3,	0,2,8,12,17,21,24,28,29,35,41,43,45,46,47,49,55,58,63,67,71,81,84,87,90,95,99,108,118,121,122,124,125,129,136,138,139,	35,43,52,53,63,78,83,88,95,129,131,135,139,141,144,148,155,248,250,251,257,302,306,315,317,318,320,339,359,366,403,408,414,417,423,429,432,
""",
        )

    def test2(self):
        aligner = self.aligner
        chromosome = Seq(
            "CTAATGCGCCTTGGTTTTGGCTTAACTAGAAGCAACCTGTAAGATTGCCAATTCTTCAGTCGAAGTAAATCTTCAATGTTTTGGACTCTTAGCGGATATGCGGCTGAGAAGTACGACATGTGTACATTCATACCTGCGTGACGGTCAGCCTCCCCCGGGACCTCATTGGGCGAATCTAGGTGTGATAATTGACACACTCTTGGTAAGAAGCACTCTTTACCCGATCTCCAAGTACCGACGCCAAGGCCAAGCTCTGCGATCTAAAGCTGCCGATCGTAGATCCAAGTCCTCAGCAAGCTCGCACGAATACGCAGTTCGAAGGCTGGGTGTTGTACGACGGTACGGTTGCTATAGCACTTTCGCGGTCTCGCTATTTTCAGTTTGACTCACCAGTCAGTATTGTCATCGACCAACTTGGAATAGTGTAACGCAGCGCTTGA"
        )
        chromosome.id = "chromosome"
        transcript = Seq(
            "CACCGGCGTCGGTACCAGAGGGCGTGAGTACCTTGTACTAGTACTCATTGGAATAATGCTCTTAGAAGTCATCTAAAAGTGACAACGCCTGTTTGGTTATGACGTTCACGACGCGTCTTAACAGACTAGCATTAGACCGACGGGTTGAGGCGTCTGGGTTGATACAGCCGTTTGCATCAGTGTATCTAACACTCTGAGGGATAATTGATGAACCGTGTTTTCCGATAGGTATGTACAGTACCACCACGCACGACTAAGGACCATTTTCTGCGTGCGACGGTTAAAATAACCTCAATCACT"
        )
        transcript.id = "transcript"
        sequence = Seq(
            "TCCCCTTCTAATGGAATCCCCCTCCGAAGGTCGCAGAAGCGGCCACGCCGGAGATACCAGTTCCACGCCTCAGGTTGGACTTGTCACACTTGTACGCGAT"
        )
        sequence.id = "sequence"
        alignments1 = aligner.align(chromosome, transcript)
        alignment1 = alignments1[0]
        self.assertEqual(alignment1.coordinates.shape[1], 126)
        self.assertEqual(
            str(alignment1),
            """\
chromosom         5 GCGCCTTGGTTTTGGCTTAACTAGA-------AGCAACC-TGTAAGATTGCCAATTCTTC
                  0 |||--|.|||---------||.|||-------||-.|||-||||------------||--
transcrip         5 GCG--TCGGT---------ACCAGAGGGCGTGAG-TACCTTGTA------------CT--

chromosom        57 AGTCGAAGTAAATCTTCAATGTTTTGGA------CTCTTAG----CGGATATGCGGCTGA
                 60 ------||||---|-|||-----|||||------|||||||----|----||----||-|
transcrip        39 ------AGTA---C-TCA-----TTGGAATAATGCTCTTAGAAGTC----AT----CT-A

chromosom       107 GAAGTACGACA-----TGT---GT----ACATTCATAC--CTGCGT-------GACGGTC
                120 .||||--||||-----|||---||----||.|||--||--|-||||-------|||--|-
transcrip        75 AAAGT--GACAACGCCTGTTTGGTTATGACGTTC--ACGAC-GCGTCTTAACAGAC--T-

chromosom       146 AGCCT----CCCCCGGGACCTCATTG-GGCGAATCTAGGTGTGATA-A-----TTGACA-
                180 |||.|----||..||||------|||-||||--|||.|||-|||||-|-----|||-||-
transcrip       127 AGCATTAGACCGACGGG------TTGAGGCG--TCTGGGT-TGATACAGCCGTTTG-CAT

chromosom       194 CA----CTCTTGGTAAGAAGCACTCT---------TTACCCGATCTCCAAGTACCGACGC
                240 ||----.|||-------||-||||||---------||----|||------|.||||----
transcrip       177 CAGTGTATCT-------AA-CACTCTGAGGGATAATT----GAT------GAACCG----

chromosom       241 CAAGGCCAAGCTCTG-----CGATCTAAAGCTGCCGATCGTAGATCCAAGTCCTCAGCAA
                300 -------------||-----||||----||.|----||-|||----|-|||.|-||.||-
transcrip       215 -------------TGTTTTCCGAT----AGGT----AT-GTA----C-AGTAC-CACCA-

chromosom       296 GCTCGCACGAATACGCAG-------TTCGAAGGCTGGGTGTTGTACGACGGTACGGTTGC
                360 ---|||||||.||---||-------||------|||.|||-----|||||||--------
transcrip       246 ---CGCACGACTA---AGGACCATTTT------CTGCGTG-----CGACGGT--------

chromosom       349 TATAGCACTTTCGCGGTCTCGCTATTTTCAGTTTGACTCACCAGTCAGTATTGTCATCGA
                420 ||-|--|----------------||----------|---|||--|||--------|||--
transcrip       281 TA-A--A----------------AT----------A---ACC--TCA--------ATC--

chromosom       409 CCAACT 415
                480 ---||| 486
transcrip       297 ---ACT 300
""",
        )
        alignments2 = aligner.align(transcript, sequence)
        alignment2 = alignments2[0]
        self.assertEqual(alignment2.coordinates.shape[1], 66)
        self.assertEqual(
            str(alignment2),
            """\
transcrip         8 TCGGTACCAGAGGGCGTGAGTACCTTGTACTAGTACTCATTGGAATAATGCTCTTAGAAG
                  0 ||------------|-------||||---|||------|-||||||--------------
sequence          0 TC------------C-------CCTT---CTA------A-TGGAAT--------------

transcrip        68 TCATCTAAAAGTGACAACGCCTGTTTGGTTATGACGTTCACGACGCGTCTTAACAGACTA
                 60 -----------------|.||-------------|--||-|||.|-|||---.||||--|
sequence         17 -----------------CCCC-------------C--TC-CGAAG-GTC---GCAGA--A

transcrip       128 GCATTAGACCGACG--GGTTGAGGCGTCTGGGTTGATACAGCCGTTTGCATCAGTGTATC
                120 ||----|.||-|||--|---||------------|||||------------||||---||
sequence         38 GC----GGCC-ACGCCG---GA------------GATAC------------CAGT---TC

transcrip       186 TAACA---CTCTGAGGGATAATTGATGAACCGTGTTTTCCGATAGGTATGTACAGTACCA
                180 ---||---|||--|||-----|||--||--|-----||----------------||----
sequence         63 ---CACGCCTC--AGG-----TTG--GA--C-----TT----------------GT----

transcrip       243 CCACGCACGACTAAGGACCATTTTCTG--CGTGCGA 277
                240 -----|||-|||-------------||--|--|||| 276
sequence         84 -----CAC-ACT-------------TGTAC--GCGA  99
""",
        )
        alignment = alignment1.map(alignment2)
        self.assertEqual(alignment.coordinates.shape[1], 78)
        self.assertEqual(
            str(alignment),
            """\
chromosom        10 TTGGTTTTGGCTTAACTAGAAGCAA-CC-TGTAAGATTGCCAATTCTTCAGTCGAAGTAA
                  0 |.------------------------||-|---------------||--------|----
sequence          0 TC-----------------------CCCTT---------------CT--------A----

chromosom        68 ATCTTCAATGTTTTGGACTCTTAGCGGATATGCGGCTGAGAAGTACGACATGTGTA----
                 60 ------|------||||-------------------------------------------
sequence         10 ------A------TGGA---------------------------------------ATCC

chromosom       124 --CATTCATAC--CTGCGT----GACGGTCAGCCT--CCCCCG--GGACCTCATTGGGCG
                120 --|--||---|--.-|-||----||-----|||----||-.||--|---------|----
sequence         19 CCC--TC---CGAA-G-GTCGCAGA-----AGC--GGCC-ACGCCG---------G----

chromosom       172 AATCTAGGTGT-GATAATTGACA-CAC--TCTTGGTAAGAAGCA---CTCT---TTACCC
                180 ------------||||--------||---||-----------||---|||----||----
sequence         51 -----------AGATA-------CCA-GTTC-----------CACGCCTC-AGGTT----

chromosom       222 GATCTCCAAGTACCGACGCCAAGGCCAAGCTCTGCGATCTAAAGCTGCCGATCGTAGATC
                240 |--------|.--|----------------------------------------------
sequence         76 G--------GA--C----------------------------------------------

chromosom       282 CAA--GTCCTCAGCAAGCTCGCACGAATACGCAGTTCGAAGGCTG--GGTGTTGTACGA
                300 -----||--------------|||-|.|---------------||--.--|-----|||
sequence         80 ---TTGT--------------CAC-ACT---------------TGTAC--G-----CGA

chromosom       337
                359
sequence         99
""",
        )
        line = format(alignment, "psl")
        self.assertEqual(
            line,
            """\
61	6	0	0	14	32	28	260	+	sequence	100	0	99	chromosome	440	10	337	35	2,2,1,2,1,1,4,1,2,1,1,1,2,2,3,2,3,1,1,4,2,2,2,3,2,1,2,1,2,3,3,2,1,1,3,	0,3,6,7,9,10,11,21,22,24,27,28,29,35,37,42,44,49,50,52,57,61,63,68,74,76,77,79,82,84,87,90,94,95,96,	10,35,37,53,63,74,81,124,127,132,133,135,137,139,146,151,154,157,167,183,194,197,210,212,216,222,231,235,285,301,305,323,325,328,334,
""",
        )


def map_check(alignment1, alignment2):
    line1 = format(alignment1, "psl")
    handle = open("transcript.psl", "w")
    handle.write(line1)
    handle.close()
    line2 = format(alignment2, "psl")
    handle = open("sequence.psl", "w")
    handle.write(line2)
    handle.close()
    stdout = os.popen("pslMap sequence.psl transcript.psl stdout")
    line = stdout.read()
    os.remove("transcript.psl")
    os.remove("sequence.psl")
    return line


def test_random(aligner, nBlocks1=1, nBlocks2=1, strand1="+", strand2="+"):
    chromosome = "".join(["ACGT"[random.randint(0, 3)] for i in range(1000)])
    nBlocks = nBlocks1
    transcript = ""
    position = 0
    for i in range(nBlocks):
        position += random.randint(60, 80)
        blockSize = random.randint(60, 80)
        transcript += chromosome[position : position + blockSize]
        position += blockSize
    nBlocks = nBlocks2
    sequence = ""
    position = 0
    for i in range(nBlocks):
        position += random.randint(20, 40)
        blockSize = random.randint(20, 40)
        sequence += transcript[position : position + blockSize]
        position += blockSize
    chromosome = Seq(chromosome)
    transcript = Seq(transcript)
    sequence = Seq(sequence)
    if strand1 == "-":
        chromosome = chromosome.reverse_complement()
    if strand2 == "-":
        sequence = sequence.reverse_complement()
    chromosome.id = "chromosome"
    transcript.id = "transcript"
    sequence.id = "sequence"
    alignments1 = aligner.align(chromosome, transcript, strand=strand1)
    alignment1 = alignments1[0]
    alignments2 = aligner.align(transcript, sequence, strand=strand2)
    alignment2 = alignments2[0]
    alignment = alignment1.map(alignment2)
    line_check = map_check(alignment1, alignment2)
    line = format(alignment, "psl")
    assert line == line_check
    print("Randomized test %d, %d, %s, %s OK" % (nBlocks1, nBlocks2, strand1, strand2))


def test_random_sequences(aligner, strand1="+", strand2="+"):
    chromosome = "".join(["ACGT"[random.randint(0, 3)] for i in range(1000)])
    transcript = "".join(["ACGT"[random.randint(0, 3)] for i in range(300)])
    sequence = "".join(["ACGT"[random.randint(0, 3)] for i in range(100)])
    chromosome = Seq(chromosome)
    transcript = Seq(transcript)
    sequence = Seq(sequence)
    chromosome.id = "chromosome"
    transcript.id = "transcript"
    sequence.id = "sequence"
    alignments = aligner.align(chromosome, transcript, strand=strand1)
    alignment1 = alignments[0]
    alignments = aligner.align(transcript, sequence, strand=strand2)
    alignment2 = alignments[0]
    line_check = map_check(alignment1, alignment2)
    alignment = alignment1.map(alignment2)
    line_check = line_check.split()
    line = format(alignment, "psl")
    line = line.split()
    assert line[8:] == line_check[8:]
    line1 = format(alignment1, "psl")
    words = line1.split()
    nBlocks1 = int(words[17])
    line2 = format(alignment2, "psl")
    words = line2.split()
    nBlocks2 = int(words[17])
    print(
        "Randomized sequence test %d, %d, %s, %s OK"
        % (nBlocks1, nBlocks2, strand1, strand2)
    )


def perform_randomized_tests(n=1000):
    """Perform randomized tests and compare to pslMap.

    Run this function to perform 8 x n mappings for alignments of randomly
    generated sequences, get the alignment in PSL format, and compare the
    result to that of pslMap.
    """
    aligner = PairwiseAligner()
    aligner.internal_open_gap_score = -1
    aligner.internal_extend_gap_score = -0.0
    aligner.match_score = +1
    aligner.mismatch_score = -1
    aligner.mode = "local"
    for i in range(n):
        nBlocks1 = random.randint(1, 10)
        nBlocks2 = random.randint(1, 10)
        test_random(aligner, nBlocks1, nBlocks2, "+", "+")
        test_random(aligner, nBlocks1, nBlocks2, "+", "-")
        test_random(aligner, nBlocks1, nBlocks2, "-", "+")
        test_random(aligner, nBlocks1, nBlocks2, "-", "-")
        test_random_sequences(aligner, "+", "+")
        test_random_sequences(aligner, "+", "-")
        test_random_sequences(aligner, "-", "+")
        test_random_sequences(aligner, "-", "-")


class TestZeroGaps(unittest.TestCase):
    def test1(self):
        coordinates = np.array([[0, 3, 6, 9], [0, 3, 6, 9]])
        sequences = [
            SeqRecord(Seq(None, 9), id="genome"),
            SeqRecord(Seq(None, 9), id="mRNA"),
        ]
        alignment1 = Alignment(sequences, coordinates)
        coordinates = np.array([[0, 9], [0, 9]])
        sequences = [
            SeqRecord(Seq(None, 9), id="mRNA"),
            SeqRecord(Seq(None, 9), id="tag"),
        ]
        alignment2 = Alignment(sequences, coordinates)
        alignment = alignment1.map(alignment2)
        self.assertTrue(
            np.array_equal(
                alignment.coordinates, np.array([[0, 3, 6, 9], [0, 3, 6, 9]])
            )
        )

    def test2(self):
        coordinates = np.array([[0, 69], [0, 69]])
        sequences = [
            SeqRecord(Seq(None, 69), id="genome"),
            SeqRecord(Seq(None, 69), id="mRNA"),
        ]
        alignment1 = Alignment(sequences, coordinates)
        coordinates = np.array([[0, 24, 24, 69], [0, 24, 24, 69]])
        sequences = [
            SeqRecord(Seq(None, 69), id="mRNA"),
            SeqRecord(Seq(None, 68), id="tag"),
        ]
        alignment2 = Alignment(sequences, coordinates)
        alignment = alignment1.map(alignment2)
        # fmt: off
        self.assertTrue(
            np.array_equal(alignment.coordinates,
                           np.array([[0, 24, 69],   # noqa: E128
                                     [0, 24, 69]]))
        )
        # fmt: on

    def test3(self):
        coordinates = np.array([[0, 69], [0, 69]])
        sequences = [
            SeqRecord(Seq(None, 69), id="genome"),
            SeqRecord(Seq(None, 69), id="mRNA"),
        ]
        alignment1 = Alignment(sequences, coordinates)
        coordinates = np.array([[0, 24, 24, 69], [0, 24, 23, 68]])
        sequences = [
            SeqRecord(Seq(None, 69), id="mRNA"),
            SeqRecord(Seq(None, 68), id="tag"),
        ]
        alignment2 = Alignment(sequences, coordinates)
        alignment = alignment1.map(alignment2)
        # fmt: off
        self.assertTrue(
            np.array_equal(alignment.coordinates,
                           np.array([[0, 24, 24, 69],  # noqa: E128
                                     [0, 24, 23, 68]]))
        )
        # fmt: on

    def test4(self):
        coordinates = np.array([[0, 210], [0, 210]])
        sequences = [
            SeqRecord(Seq(None, 210), id="genome"),
            SeqRecord(Seq(None, 210), id="mRNA"),
        ]
        alignment1 = Alignment(sequences, coordinates)
        coordinates = np.array([[0, 18, 18, 102, 102, 210], [0, 18, 17, 101, 100, 208]])
        sequences = [
            SeqRecord(Seq(None, 210), id="mRNA"),
            SeqRecord(Seq(None, 208), id="tag"),
        ]
        alignment2 = Alignment(sequences, coordinates)
        alignment = alignment1.map(alignment2)
        # fmt: off
        self.assertTrue(
            np.array_equal(alignment.coordinates,
                           np.array([[0, 18, 18, 102, 102, 210],
                                     [0, 18, 17, 101, 100, 208]]))
        )
        # fmt: on

    def test5(self):
        coordinates = np.array([[0, 210], [0, 210]])
        sequences = [
            SeqRecord(Seq(None, 210), id="genome"),
            SeqRecord(Seq(None, 210), id="mRNA"),
        ]
        alignment1 = Alignment(sequences, coordinates)
        coordinates = np.array([[0, 51, 51, 210], [0, 51, 49, 208]])
        sequences = [
            SeqRecord(Seq(None, 210), id="mRNA"),
            SeqRecord(Seq(None, 208), id="tag"),
        ]
        alignment2 = Alignment(sequences, coordinates)
        alignment = alignment1.map(alignment2)
        # fmt: off
        self.assertTrue(
            np.array_equal(alignment.coordinates,
                           np.array([[0, 51, 51, 210],
                                     [0, 51, 49, 208]]))
        )
        # fmt: on


class TestLiftOver(unittest.TestCase):
    def test_chimp(self):
        chain = Align.read("Blat/panTro5ToPanTro6.over.chain", "chain")
        alignment = Align.read("Blat/est.panTro5.psl", "psl")
        self.assertEqual(chain.target.id, alignment.target.id)
        self.assertEqual(len(chain.target.seq), len(alignment.target.seq))
        chain = chain[::-1]
        record = SeqIO.read("Blat/est.fa", "fasta")
        self.assertEqual(record.id, alignment.query.id)
        self.assertEqual(len(record.seq), len(alignment.query.seq))
        alignment.query = record.seq
        record = SeqIO.read("Blat/panTro5.fa", "fasta")
        chromosome, start_end = record.id.split(":")
        start, end = start_end.split("-")
        start = int(start)
        end = int(end)
        data = {start: str(record.seq)}
        length = len(alignment.target.seq)
        seq = Seq(data, length=length)
        record = SeqRecord(seq, id="chr1")
        alignment.target = record
        text = """^\
chr1      122835789 AGAGATTATTTTGCAGAGGATGATGGGGAGATGGTACCCAGAACGAGTCACACAGCAGGT
                  0 ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::--
query            32 AGAGATTATTTTGCAGAGGATGATGGGGAGATGGTACCCAGAACGAGTCACACAGCAG--

chr1      122835849 AAGGATGCTGTGGGCCTTGCCTTGTTAAATTCTTTGtttcttttgtttattcatttggtt
                 60 ------------------------------------------------------------
query            90 ------------------------------------------------------------
((.|\n)*)
chr1      122840889 TTGTATTTTGCAGAAACTGAATTCTGCTGGAATGTGCCAGTTAGAATGATCCTAGTGCTG
               5100 ------------------------------------------------------------
query            90 ------------------------------------------------------------

chr1      122840949 TTATTATATAAACCTTTTTTGTTGTTGTTCTGTTTCATTGACAGCTTTTCTTAGTGACAC
               5160 --------------------------------------------::::::::::::::::
query            90 --------------------------------------------CTTTTCTTAGTGACAC

chr1      122841009 TAAAGATCGAGGCCCTCCAGTGCAGTCACAGATCTGGAGAAGTGGTGAAAAGGTCCCGTT
               5220 ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::X:
query           106 TAAAGATCGAGGCCCTCCAGTGCAGTCACAGATCTGGAGAAGTGGTGAAAAGGTCCCGNT

chr1      122841069 TGTGCAGACATATTCCTTGAGAGCATTTGAGAAACCCCCTCAGGTACAGACCCAGGCTCT
               5280 ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
query           166 TGTGCAGACATATTCCTTGAGAGCATTTGAGAAACCCCCTCAGGTACAGACCCAGGCTCT

chr1      122841129 TCGAGACTTTGAGAAGGTAAGTCATGTGAGTGGATAATTGTTATCCCAATTAGAAGCAGT
               5340 ::::::::::::::::--------------------------------------------
query           226 TCGAGACTTTGAGAAG--------------------------------------------

chr1      122841189 ACTATGGAATAGTGATGCCTGATAAAAATATGACCCATGGATTGGTCCGGATTATGGATG
               5400 ------------------------------------------------------------
query           242 ------------------------------------------------------------
((.|\n)*)
chr1      122907129 GTTCTTGGGTTGAGGGGGCAATCGGGCACGCTCCTCCCCATGGGTTGCCCATCATGTCTA
              71340 ------------------------------------------------------------
query           242 ------------------------------------------------------------

chr1      122907189 ATGGATATCGCACTCTGTCCCAGCACCTCAATGACCTGAAGAAGGAGAACTTCAGCCTCA
              71400 -----------------------:::::::::::::::::::::::::::::::::::::
query           242 -----------------------CACCTCAATGACCTGAAGAAGGAGAACTTCAGCCTCA

chr1      122907249 AGCTGCGCATCTACTTCCTGGAGGAGCGCATGCAACAGAAGTATGAGGCCAGCCGGGAGG
              71460 ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
query           279 AGCTGCGCATCTACTTCCTGGAGGAGCGCATGCAACAGAAGTATGAGGCCAGCCGGGAGG

chr1      122907309 ACATC 122907314
              71520 :::::     71525
query           339 ACATC       344
$"""
        self.assertRegex(str(alignment).replace("|", ":").replace(".", "X"), text)
        lifted_alignment = chain.map(alignment)
        # fmt: off
        self.assertTrue(
            np.array_equal(lifted_alignment.coordinates,
                           np.array([[111982717, 111982775, 111987921,
                                      111988073, 112009200, 112009302],
                                     [       32,        90,        90,
                                            242,       242,       344]])
                          )
        )
        # fmt: on
        record = SeqIO.read("Blat/panTro6.fa", "fasta")
        chromosome, start_end = record.id.split(":")
        start, end = start_end.split("-")
        start = int(start)
        end = int(end)
        data = {start: str(record.seq)}
        length = len(alignment.target.seq)
        seq = Seq(data, length=length)
        record = SeqRecord(seq, id="chr1")
        lifted_alignment.target = record
        text = """^\
chr1      111982717 AGAGATTATTTTGCAGAGGATGATGGGGAGATGGTACCCAGAACGAGTCACACAGCAGGT
                  0 ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::--
query            32 AGAGATTATTTTGCAGAGGATGATGGGGAGATGGTACCCAGAACGAGTCACACAGCAG--

chr1      111982777 AAGGATGCTGTGGGCCTTGCCTTGTTAAATTCTTTGtttcttttgtttattcatttggtt
                 60 ------------------------------------------------------------
query            90 ------------------------------------------------------------
((.|\n)*)
chr1      111987817 TTGTATTTTGCAGAAACTGAATTCTGCTGGAATGTGCCAGTTAGAATGATCCTAGTGCTG
               5100 ------------------------------------------------------------
query            90 ------------------------------------------------------------

chr1      111987877 TTATTATATAAACCTTTTTTGTTGTTGTTCTGTTTCATTGACAGCTTTTCTTAGTGACAC
               5160 --------------------------------------------::::::::::::::::
query            90 --------------------------------------------CTTTTCTTAGTGACAC

chr1      111987937 TAAAGATCGAGGCCCTCCAGTGCAGTCACAGATCTGGAGAAGTGGTGAAAAGGTCCCGTT
               5220 ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::X:
query           106 TAAAGATCGAGGCCCTCCAGTGCAGTCACAGATCTGGAGAAGTGGTGAAAAGGTCCCGNT

chr1      111987997 TGTGCAGACATATTCCTTGAGAGCATTTGAGAAACCCCCTCAGGTACAGACCCAGGCTCT
               5280 ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
query           166 TGTGCAGACATATTCCTTGAGAGCATTTGAGAAACCCCCTCAGGTACAGACCCAGGCTCT

chr1      111988057 TCGAGACTTTGAGAAGGTAAGTCATGTGAGTGGATAATTGTTATCCCAATTAGAAGCAGT
               5340 ::::::::::::::::--------------------------------------------
query           226 TCGAGACTTTGAGAAG--------------------------------------------

chr1      111988117 ACTATGGAATAGTGATGCCTGATAAAAATATGACCCATGGATTGGTCCGGATTATGGATG
               5400 ------------------------------------------------------------
query           242 ------------------------------------------------------------
((.|\n)*)
chr1      112009117 GTTCTTGGGTTGAGGGGGCAATCGGGCACGCTCCTCCCCATGGGTTGCCCATCATGTCTA
              26400 ------------------------------------------------------------
query           242 ------------------------------------------------------------

chr1      112009177 ATGGATATCGCACTCTGTCCCAGCACCTCAATGACCTGAAGAAGGAGAACTTCAGCCTCA
              26460 -----------------------:::::::::::::::::::::::::::::::::::::
query           242 -----------------------CACCTCAATGACCTGAAGAAGGAGAACTTCAGCCTCA

chr1      112009237 AGCTGCGCATCTACTTCCTGGAGGAGCGCATGCAACAGAAGTATGAGGCCAGCCGGGAGG
              26520 ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
query           279 AGCTGCGCATCTACTTCCTGGAGGAGCGCATGCAACAGAAGTATGAGGCCAGCCGGGAGG

chr1      112009297 ACATC 112009302
              26580 :::::     26585
query           339 ACATC       344
$"""
        self.assertRegex(
            str(lifted_alignment).replace("|", ":").replace(".", "X"), text
        )


if __name__ == "__main__":
    runner = unittest.TextTestRunner(verbosity=2)
    unittest.main(testRunner=runner)
