# Copyright 2009 by David Winter.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

import os
import sys
import unittest

from Bio import MissingExternalDependencyError
from Bio import AlignIO
from Bio.Nexus import Trees  # One day we should use planned TreeIO module

from Bio.Emboss.Applications import FDNADistCommandline, FNeighborCommandline
from Bio.Emboss.Applications import FSeqBootCommandline, FProtDistCommandline
from Bio.Emboss.Applications import FProtParsCommandline, FConsenseCommandline
from Bio.Emboss.Applications import FTreeDistCommandline, FDNAParsCommandline

# Try to avoid problems when the OS is in another language
os.environ['LANG'] = 'C'

exes_wanted = ['fdnadist', 'fneighbor', 'fprotdist', 'fprotpars', 'fconsense',
               'fseqboot', 'ftreedist', 'fdnapars']
exes = dict()  # Dictionary mapping from names to exe locations

if "EMBOSS_ROOT" in os.environ:
    # Windows default installation path is C:\mEMBOSS which contains the exes.
    # EMBOSS also sets an environment variable which we will check for.
    path = os.environ["EMBOSS_ROOT"]
    if os.path.isdir(path):
        for name in exes_wanted:
            if os.path.isfile(os.path.join(path, name + ".exe")):
                exes[name] = os.path.join(path, name + ".exe")
    del path, name
if sys.platform != "win32":
    from Bio._py3k import getoutput
    for name in exes_wanted:
        # This will "just work" if installed on the path as normal on Unix
        output = getoutput("%s -help" % name)
        if "not found" not in output and "not recognized" not in output:
            exes[name] = name
        del output
    del name

if len(exes) < len(exes_wanted):
    raise MissingExternalDependencyError(
          "Install the Emboss package 'PhylipNew' if you want to use the " +
          "Bio.Emboss.Applications wrappers for phylogenetic tools.")

# #########################################################################


# A few top level functions that are called repeatedly in the test cases
def write_AlignIO_dna():
    """Convert opuntia.aln to a phylip file"""
    assert 1 == AlignIO.convert("Clustalw/opuntia.aln", "clustal",
                                "Phylip/opuntia.phy", "phylip")


def write_AlignIO_protein():
    """Convert hedgehog.aln to a phylip file"""
    assert 1 == AlignIO.convert("Clustalw/hedgehog.aln", "clustal",
                                "Phylip/hedgehog.phy", "phylip")


def clean_up():
    """Delete tests files (to be used as tearDown() function in test fixtures)"""
    for filename in ["test_file", "Phylip/opuntia.phy", "Phylip/hedgehog.phy"]:
        if os.path.isfile(filename):
            os.remove(filename)


def parse_trees(filename):
    """Helper function until we have Bio.Phylo on trunk."""
    data = open("test_file", "r").read()
    for tree_str in data.split(";\n"):
        if tree_str:
            yield Trees.Tree(tree_str + ";")


class DistanceTests(unittest.TestCase):
    """Tests for calculating distance based phylogenetic trees with phylip"""

    def tearDown(self):
        clean_up()

    test_taxa = ['Archaeohip', 'Calippus', 'Hypohippus', 'M._secundu',
                 'Merychippu', 'Mesohippus', 'Nannipus', 'Neohippari',
                 'Parahippus', 'Pliohippus']

    def distances_from_alignment(self, filename, DNA=True):
        """check we can make distance matrix from a given alignment"""
        self.assertTrue(os.path.isfile(filename), "Missing %s" % filename)
        if DNA:
            cline = FDNADistCommandline(exes["fdnadist"],
                                         method='j',
                                         sequence=filename,
                                         outfile="test_file",
                                         auto=True)
        else:
            cline = FProtDistCommandline(exes["fprotdist"],
                                         method='j',
                                         sequence=filename,
                                         outfile="test_file",
                                         auto=True)
        stdout, strerr = cline()
        # biopython can't grok distance matrices, so we'll just check it exists
        self.assertTrue(os.path.isfile("test_file"))

    def tree_from_distances(self, filename):
        """Check we can estimate a tree from a distance matrix"""
        self.assertTrue(os.path.isfile(filename), "Missing %s" % filename)
        cline = FNeighborCommandline(exes["fneighbor"],
                                     datafile=filename,
                                     outtreefile="test_file",
                                     auto=True, filter=True)
        stdout, stderr = cline()
        for tree in parse_trees("test_file"):
            tree_taxa = [t.replace(" ", "_") for t in tree.get_taxa()]
            self.assertEqual(self.test_taxa, sorted(tree_taxa))

    def test_distances_from_phylip_DNA(self):
        """Calculate a distance matrix from an phylip alignment"""
        self.distances_from_alignment("Phylip/horses.phy")

    def test_distances_from_AlignIO_DNA(self):
        """Calculate a distance matrix from an alignment written by AlignIO"""
        write_AlignIO_dna()
        self.distances_from_alignment("Phylip/opuntia.phy")

    # def test_distances_from_bootstrapped_phylip_DNA(self):
    #    """Calculate a set of distance matrices from phylip alignments"""
    #    self.distances_from_alignment("Phylip/bs_horses.phy")

    # fprotdist tests
    def test_distances_from_protein_phylip(self):
        """Calculate a distance matrix from phylip protein alignment"""
        self.distances_from_alignment("Phylip/interlaced.phy", DNA=False)

    def test_distances_from_protein_AlignIO(self):
        """Calculate distance matrix from an AlignIO written protein alignment"""
        write_AlignIO_protein()
        self.distances_from_alignment("Phylip/hedgehog.phy", DNA=False)

    # def test_distances_from_bootstrapped_phylip_protein(self):
    #    """Calculate distance matrices from a bootstrapped protein alignment"""
    #    self.distances_from_alignment("Clustalw/bs_interlaced.phy", DNA=False)

    # fneighbor tests
    # def test_tree_from_distances(self):
    #    """Estimate tree from distance matrix and parse it."""
    #    self.tree_from_distances("Phylip/horses.fdnadist")

    # This one won't work because of a bug in EMBOSS 6.0.1
    # def test_tree_from_bootstrapped_distances(self):
    #    """Estimate tree from bootstrapped distance matrix and parse it"""
    #    self.tree_from_distances("Phylip/bs_horses.fdnadist")


class ParsimonyTests(unittest.TestCase):
    """Tests for estimating parsimony based phylogenetic trees with phylip"""

    def tearDown(self):
        clean_up()

    def parsimony_tree(self, filename, format, DNA=True):
        """Estimate a parsimony tree from an alignment"""
        self.assertTrue(os.path.isfile(filename), "Missing %s" % filename)
        if DNA:
            cline = FDNAParsCommandline(exes["fdnapars"],
                                        sequence=filename,
                                        outtreefile="test_file",
                                        auto=True, stdout=True)
        else:
            cline = FProtParsCommandline(exes["fprotpars"],
                                         sequence=filename,
                                         outtreefile="test_file",
                                         auto=True, stdout=True)
        stdout, stderr = cline()
        a_taxa = [s.name.replace(" ", "_") for s in
                  next(AlignIO.parse(open(filename, "r"), format))]
        for tree in parse_trees("test_file"):
            t_taxa = [t.replace(" ", "_") for t in tree.get_taxa()]
            self.assertEqual(sorted(a_taxa), sorted(t_taxa))

    # fdnapars tests
    # def test_parsimony_tree_from_phylip_DNA(self):
    #    """Make a parsimony tree from a phylip DNA alignment"""
    #    self.parsimony_tree("Phylip/horses.phy", "phylip")

    def test_parsimony_tree_from_AlignIO_DNA(self):
        """Make a parsimony tree from an alignment written with AlignIO"""
        write_AlignIO_dna()
        self.parsimony_tree("Phylip/opuntia.phy", "phylip")

    # def test_parsimony_bootstrapped_phylip_DNA(self):
    #    """Make a parsimony tree from a bootstrapped phylip DNA alignment"""
    #    self.parsimony_tree("Phylip/bs_horses.phy", "phylip")

    # fprotpars tests
    # def test_parsimony_tree_from_phylip_protein(self):
    #    """Make a parsimony tree from a phylip DNA alignment"""
    #    self.parsimony_tree("Phylip/interlaced.phy", "phylip", DNA=False)

    def test_parsimony_from_AlignIO_protein(self):
        """Make a parsimony tree from protein alignment written with AlignIO"""
        write_AlignIO_protein()
        self.parsimony_tree("Phylip/interlaced.phy", "phylip", DNA=False)

    # def test_parsimony_tree_bootstrapped_phylip_protein(self):
    #    """Make a parsimony tree from a phylip DNA alignment"""
    #    self.parsimony_tree("Phylip/bs_interlaced.phy", "phylip", DNA=False)


class BootstrapTests(unittest.TestCase):
    """Tests for pseudosampling alignments with fseqboot"""

    def tearDown(self):
        clean_up()

    def check_bootstrap(self, filename, format, align_type="d"):
        """Check we can use fseqboot to pseudosample an alignment

        The align_type type argument is passed to the commandline object to
        set the output format to use (from [D]na,[p]rotein and [r]na )
        """
        self.assertTrue(os.path.isfile(filename), "Missing %s" % filename)
        cline = FSeqBootCommandline(exes["fseqboot"],
                                    sequence=filename,
                                    outfile="test_file",
                                    seqtype=align_type,
                                    reps=2,
                                    auto=True, filter=True)
        stdout, stderr = cline()
        # the resultant file should have 2 alignments...
        bs = list(AlignIO.parse(open("test_file", "r"), format))
        self.assertEqual(len(bs), 2)
        # ..and each name in the original alignment...
        a_names = [s.name.replace(" ", "_") for s in
                   AlignIO.read(open(filename, "r"), format)]
        # ...should be in each alignment in the bootstrapped file
        for a in bs:
            self.assertEqual(a_names, [s.name.replace(" ", "_") for s in a])

    def test_bootstrap_phylip_DNA(self):
        """Pseudosample a phylip DNA alignment"""
        self.check_bootstrap("Phylip/horses.phy", "phylip")

    def test_bootstrap_AlignIO_DNA(self):
        """Pseudosample a phylip DNA alignment written with AlignIO"""
        write_AlignIO_dna()
        self.check_bootstrap("Phylip/opuntia.phy", "phylip")

    def test_bootstrap_phylip_protein(self):
        """Pseudosample a phylip protein alignment"""
        self.check_bootstrap("Phylip/interlaced.phy", "phylip", "p")

    def test_bootstrap_AlignIO_protein(self):
        """Pseudosample a phylip protein alignment written with AlignIO"""
        write_AlignIO_protein()
        self.check_bootstrap("Phylip/hedgehog.phy", "phylip", "p")


class TreeComparisonTests(unittest.TestCase):
    """Tests for comparing phylogenetic trees with phylip tools"""

    def tearDown(self):
        clean_up()

    def test_fconsense(self):
        """Calculate a consensus tree with fconsense"""
        cline = FConsenseCommandline(exes["fconsense"],
                                     intreefile="Phylip/horses.tree",
                                     outtreefile="test_file",
                                     auto=True, filter=True)
        stdout, stderr = cline()
        # Split the next and get_taxa into two steps to help 2to3 work
        tree1 = next(parse_trees("test_file"))
        taxa1 = tree1.get_taxa()
        for tree in parse_trees("Phylip/horses.tree"):
            taxa2 = tree.get_taxa()
            self.assertEqual(sorted(taxa1), sorted(taxa2))

    def test_ftreedist(self):
        """Calculate the distance between trees with ftreedist"""
        cline = FTreeDistCommandline(exes["ftreedist"],
                                     intreefile="Phylip/horses.tree",
                                     outfile="test_file",
                                     auto=True, filter=True)
        stdout, stderr = cline()
        self.assertTrue(os.path.isfile("test_file"))

if __name__ == "__main__":
    runner = unittest.TextTestRunner(verbosity=2)
    unittest.main(testRunner=runner)
    clean_up()
