#!/usr/bin/env python
"""Unit tests for unigene-specific classes
"""
from unittest import TestCase

from cogent3.parse.record_finder import GbFinder
from cogent3.parse.unigene import (
    LinesToUniGene,
    UniGeneProtSimRecord,
    UniGeneSeqRecord,
    _read_expression,
    _read_seq,
    _read_sts,
)


class unigeneTests(TestCase):
    """Tests toplevel functions."""

    def test_read_sts(self):
        """_read_sts should perform correct conversions"""
        self.assertEqual(
            _read_sts("ACC=RH128467 UNISTS=211775\n"),
            {"ACC": "RH128467", "UNISTS": "211775"},
        )

    def test_read_expression(self):
        """_read_expression should perform correct conversions"""
        self.assertEqual(
            _read_expression("embryo ; whole body ; mammary gland ; brain\n"),
            ["embryo", "whole body", "mammary gland", "brain"],
        )

    def test_read_seq(self):
        """_read_seq should perform correct conversions"""
        # reset the found fields, since we can't guarantee order of test
        # execution and it's persistent class data
        UniGeneSeqRecord.found_fields = {}
        self.assertEqual(
            _read_seq("ACC=BC025044.1\n"), UniGeneSeqRecord({"ACC": "BC025044.1"})
        )
        self.assertEqual(
            _read_seq(
                "ACC=AI842963.1; NID=g5477176; CLONE=UI-M-AO1-aem-f-10-0-UI; END=3'; LID=1944; SEQTYPE=EST; TRACE=158501677\n"
            ),
            UniGeneSeqRecord(
                {
                    "ACC": "AI842963.1",
                    "NID": "g5477176",
                    "CLONE": "UI-M-AO1-aem-f-10-0-UI",
                    "END": "3'",
                    "LID": "1944",
                    "SEQTYPE": "EST",
                    "TRACE": "158501677",
                }
            ),
        )

    def test_LinesToUniGene(self):
        """LinesToUniGene should give expected results on sample data"""
        fake_file = """ID          Mm.1
TITLE       S100 calcium binder
GENE        S100a10
CYTOBAND    3 41.7 cM
LOCUSLINK   20194
EXPRESS     embryo ; whole body ; mammary gland ; brain
CHROMOSOME  3
STS         ACC=RH128467 UNISTS=211775
STS         ACC=M16465 UNISTS= 178878
PROTSIM     ORG=Homo sapiens; PROTGI=107251; PROTID=pir:JC1139; PCT=91; ALN=97
PROTSIM     ORG=Mus musculus; PROTGI=116487; PROTID=sp:P08207; PCT=100; ALN=97
PROTSIM     ORG=Rattus norvegicus; PROTGI=116489; PROTID=sp:P05943; PCT=94; ALN=94
SCOUNT      5
SEQUENCE    ACC=BC025044.1; NID=g19263549; PID=g19263550; SEQTYPE=mRNA
SEQUENCE    ACC=AA471893.1; NID=g2199884; CLONE=IMAGE:872193; END=5'; LID=539; SEQTYPE=EST
SEQUENCE    ACC=AI842963.1; NID=g5477176; CLONE=UI-M-AO1-aem-f-10-0-UI; END=3'; LID=1944; SEQTYPE=EST; TRACE=158501677
SEQUENCE    ACC=CB595147.1; NID=g29513003; CLONE=IMAGE:30300703; END=5'; LID=12885; MGC=6677832; SEQTYPE=EST
SEQUENCE    ACC=BY144053.1; NID=g26280109; CLONE=L930184D22; END=5'; LID=12267; SEQTYPE=EST
//
ID          Mm.5
TITLE       homeo box A10
GENE        Hoxa10
CYTOBAND    6 26.33 cM
LOCUSLINK   15395
EXPRESS     kidney ; colon ; mammary gland
CHROMOSOME  6
PROTSIM     ORG=Caenorhabditis elegans; PROTGI=7510074; PROTID=pir:T31611; PCT=30; ALN=326
SCOUNT      1
SEQUENCE    ACC=AW990320.1; NID=g8185938; CLONE=IMAGE:1513482; END=5'; LID=1043; SEQTYPE=EST; TRACE=94472873
//
"""
        records = list(GbFinder(fake_file.split("\n")))
        self.assertEqual(len(records), 2)
        first, second = list(map(LinesToUniGene, records))
        self.assertEqual(first.ID, "Mm.1")
        self.assertEqual(first.TITLE, "S100 calcium binder")
        self.assertEqual(first.GENE, "S100a10")
        self.assertEqual(first.CYTOBAND, "3 41.7 cM")
        self.assertEqual(first.CHROMOSOME, "3")
        self.assertEqual(first.LOCUSLINK, 20194)
        self.assertEqual(
            first.EXPRESS, ["embryo", "whole body", "mammary gland", "brain"]
        )
        self.assertEqual(
            first.STS,
            [
                {"ACC": "RH128467", "UNISTS": "211775"},
                {"ACC": "M16465", "UNISTS": "178878"},
            ],
        )
        exp_prot_sim = list(
            map(
                UniGeneProtSimRecord,
                [
                    {
                        "ORG": "Homo sapiens",
                        "PROTGI": "107251",
                        "PROTID": "pir:JC1139",
                        "PCT": "91",
                        "ALN": "97",
                    },
                    {
                        "ORG": "Mus musculus",
                        "PROTGI": "116487",
                        "PROTID": "sp:P08207",
                        "PCT": "100",
                        "ALN": "97",
                    },
                    {
                        "ORG": "Rattus norvegicus",
                        "PROTGI": "116489",
                        "PROTID": "sp:P05943",
                        "PCT": "94",
                        "ALN": "94",
                    },
                ],
            )
        )
        for obs, exp in zip(first.PROTSIM, exp_prot_sim):
            self.assertEqual(obs, exp)
        self.assertEqual(first.SCOUNT, 5)
        exp_seqs = list(
            map(
                UniGeneSeqRecord,
                [
                    {
                        "ACC": "BC025044.1",
                        "NID": "g19263549",
                        "PID": "g19263550",
                        "SEQTYPE": "mRNA",
                    },
                    {
                        "ACC": "AA471893.1",
                        "NID": "g2199884",
                        "END": "5'",
                        "CLONE": "IMAGE:872193",
                        "LID": "539",
                        "SEQTYPE": "EST",
                    },
                    {
                        "ACC": "AI842963.1",
                        "NID": "g5477176",
                        "CLONE": "UI-M-AO1-aem-f-10-0-UI",
                        "END": "3'",
                        "LID": "1944",
                        "SEQTYPE": "EST",
                        "TRACE": "158501677",
                    },
                    {
                        "ACC": "CB595147.1",
                        "NID": "g29513003",
                        "CLONE": "IMAGE:30300703",
                        "END": "5'",
                        "LID": "12885",
                        "MGC": "6677832",
                        "SEQTYPE": "EST",
                    },
                    {
                        "ACC": "BY144053.1",
                        "NID": "g26280109",
                        "CLONE": "L930184D22",
                        "END": "5'",
                        "LID": "12267",
                        "SEQTYPE": "EST",
                    },
                ],
            )
        )
        for obs, exp in zip(first.SEQUENCE, exp_seqs):
            self.assertEqual(obs, exp)
        self.assertEqual(second.ID, "Mm.5")
        self.assertEqual(second.TITLE, "homeo box A10")
        self.assertEqual(second.GENE, "Hoxa10")
        self.assertEqual(second.CYTOBAND, "6 26.33 cM")
        self.assertEqual(second.LOCUSLINK, 15395)
        self.assertEqual(second.EXPRESS, ["kidney", "colon", "mammary gland"])
        self.assertEqual(second.CHROMOSOME, "6")
        self.assertEqual(
            second.PROTSIM,
            list(
                map(
                    UniGeneProtSimRecord,
                    [
                        {
                            "ORG": "Caenorhabditis elegans",
                            "PROTGI": "7510074",
                            "PROTID": "pir:T31611",
                            "PCT": "30",
                            "ALN": "326",
                        }
                    ],
                )
            ),
        )
        self.assertEqual(second.SCOUNT, 1)
        self.assertEqual(second.STS, [])
        self.assertEqual(
            second.SEQUENCE,
            list(
                map(
                    UniGeneSeqRecord,
                    [
                        {
                            "ACC": "AW990320.1",
                            "NID": "g8185938",
                            "CLONE": "IMAGE:1513482",
                            "END": "5'",
                            "LID": "1043",
                            "SEQTYPE": "EST",
                            "TRACE": "94472873",
                        }
                    ],
                )
            ),
        )

        # test that the synonym mapping works OK
        self.assertEqual(second.SequenceIds[0].NucleotideId, "g8185938")
