1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
|
# Copyright 2006-2014 by Peter Cock. All rights reserved.
# Revisions copyright 2011 Brandon Invergo. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Tests for Bio.AlignIO.PhylipIO"""
import unittest
from Bio._py3k import StringIO
from Bio.AlignIO.PhylipIO import PhylipIterator, PhylipWriter
phylip_text = """ 8 286
V_Harveyi_ --MKNWIKVA VAAIA--LSA A--------- ---------T VQAATEVKVG
B_subtilis MKMKKWTVLV VAALLAVLSA CG-------- ----NGNSSS KEDDNVLHVG
B_subtilis MKKALLALFM VVSIAALAAC GAGNDNQSKD NAKDGDLWAS IKKKGVLTVG
YA80_HAEIN MKKLLFTTAL LTGAIAFSTF ---------- -SHAGEIADR VEKTKTLLVG
FLIY_ECOLI MKLAHLGRQA LMGVMAVALV AG---MSVKS FADEG-LLNK VKERGTLLVG
E_coli_Gln --MKSVLKVS LAALTLAFAV S--------- ---------S HAADKKLVVA
Deinococcu -MKKSLLSLK LSGLLVPSVL ALS------- -LSACSSPSS TLNQGTLKIA
HISJ_E_COL MKKLVLSLSL VLAFSSATAA F--------- ---------- AAIPQNIRIG
MSGRYFPFTF VKQ--DKLQG FEVDMWDEIG KRNDYKIEYV TANFSGLFGL
ATGQSYPFAY KEN--GKLTG FDVEVMEAVA KKIDMKLDWK LLEFSGLMGE
TEGTYEPFTY HDKDTDKLTG YDVEVITEVA KRLGLKVDFK ETQWGSMFAG
TEGTYAPFTF HDK-SGKLTG FDVEVIRKVA EKLGLKVEFK ETQWDAMYAG
LEGTYPPFSF QGD-DGKLTG FEVEFAQQLA KHLGVEASLK PTKWDGMLAS
TDTAFVPFEF KQG--DKYVG FDVDLWAAIA KELKLDYELK PMDFSGIIPA
MEGTYPPFTS KNE-QGELVG FDVDIAKAVA QKLNLKPEFV LTEWSGILAG
TDPTYAPFES KNS-QGELVG FDIDLAKELC KRINTQCTFV ENPLDALIPS
LETGRIDTIS NQITMTDARK AKYLFADPYV VDG-AQITVR KGNDSIQGVE
LQTGKLDTIS NQVAVTDERK ETYNFTKPYA YAG-TQIVVK KDNTDIKSVD
LNSKRFDVVA NQVG-KTDRE DKYDFSDKYT TSR-AVVVTK KDNNDIKSEA
LNAKRFDVIA NQTNPSPERL KKYSFTTPYN YSG-GVIVTK SSDNSIKSFE
LDSKRIDVVI NQVTISDERK KKYDFSTPYT ISGIQALVKK GNEGTIKTAD
LQTKNVDLAL AGITITDERK KAIDFSDGYY KSG-LLVMVK ANNNDVKSVK
LQANKYDVIV NQVGITPERQ NSIGFSQPYA YSRPEIIVAK NNTFNPQSLA
LKAKKIDAIM SSLSITEKRQ QEIAFTDKLY AADSRLVVAK NSDIQP-TVE
DLAGKTVAVN LGSNFEQLLR DYDKDGKINI KTYDT--GIE HDVALGRADA
DLKGKTVAAV LGSNHAKNLE SKDPDKKINI KTYETQEGTL KDVAYGRVDA
DVKGKTSAQS LTSNYNKLAT N----AGAKV EGVEGMAQAL QMIQQARVDM
DLKGRKSAQS ATSNWGKDAK A----AGAQI LVVDGLAQSL ELIKQGRAEA
DLKGKKVGVG LGTNYEEWLR QNV--QGVDV RTYDDDPTKY QDLRVGRIDA
DLDGKVVAVK SGTGSVDYAK AN--IKTKDL RQFPNIDNAY MELGTNRADA
DLKGKRVGST LGSNYEKQLI DTG---DIKI VTYPGAPEIL ADLVAGRIDA
SLKGKRVGVL QGTTQETFGN EHWAPKGIEI VSYQGQDNIY SDLTAGRIDA
FIMDRLSALE -LIKKT-GLP LQLAGEPFET I-----QNAW PFVDNEKGRK
YVNSRTVLIA -QIKKT-GLP LKLAGDPIVY E-----QVAF PFAKDDAHDK
TYNDKLAVLN -YLKTSGNKN VKIAFETGEP Q-----STYF TFRKGS--GE
TINDKLAVLD -YFKQHPNSG LKIAYDRGDK T-----PTAF AFLQGE--DA
ILVDRLAALD -LVKKT-NDT LAVTGEAFSR Q-----ESGV ALRKGN--ED
VLHDTPNILY -FIKTAGNGQ FKAVGDSLEA Q-----QYGI AFPKGS--DE
AYNDRLVVNY -IINDQ-KLP VRGAGQIGDA A-----PVGI ALKKGN--SA
AFQDEVAASE GFLKQPVGKD YKFGGPSVKD EKLFGVGTGM GLRKED--NE
LQAEVNKALA EMRADGTVEK ISVKWFGADI TK----
LRKKVNKALD ELRKDGTLKK LSEKYFNEDI TVEQKH
VVDQVNKALK EMKEDGTLSK ISKKWFGEDV SK----
LITKFNQVLE ALRQDGTLKQ ISIEWFGYDI TQ----
LLKAVNDAIA EMQKDGTLQA LSEKWFGADV TK----
LRDKVNGALK TLRENGTYNE IYKKWFGTEP K-----
LKDQIDKALT EMRSDGTFEK ISQKWFGQDV GQP---
LREALNKAFA EMRADGTYEK LAKKYFDFDV YGG---
"""
# From here:
# http://atgc.lirmm.fr/phyml/usersguide.html
phylip_text2 = """5 60
Tax1 CCATCTCACGGTCGGTACGATACACCTGCTTTTGGCAG
Tax2 CCATCTCACGGTCAGTAAGATACACCTGCTTTTGGCGG
Tax3 CCATCTCCCGCTCAGTAAGATACCCCTGCTGTTGGCGG
Tax4 TCATCTCATGGTCAATAAGATACTCCTGCTTTTGGCGG
Tax5 CCATCTCACGGTCGGTAAGATACACCTGCTTTTGGCGG
GAAATGGTCAATATTACAAGGT
GAAATGGTCAACATTAAAAGAT
GAAATCGTCAATATTAAAAGGT
GAAATGGTCAATCTTAAAAGGT
GAAATGGTCAATATTAAAAGGT"""
phylip_text3 = """5 60
Tax1 CCATCTCACGGTCGGTACGATACACCTGCTTTTGGCAGGAAATGGTCAATATTACAAGGT
Tax2 CCATCTCACGGTCAGTAAGATACACCTGCTTTTGGCGGGAAATGGTCAACATTAAAAGAT
Tax3 CCATCTCCCGCTCAGTAAGATACCCCTGCTGTTGGCGGGAAATCGTCAATATTAAAAGGT
Tax4 TCATCTCATGGTCAATAAGATACTCCTGCTTTTGGCGGGAAATGGTCAATCTTAAAAGGT
Tax5 CCATCTCACGGTCGGTAAGATACACCTGCTTTTGGCGGGAAATGGTCAATATTAAAAGGT"""
# From here:
# http://evolution.genetics.washington.edu/phylip/doc/sequence.html
# Note the lack of any white space between names 2 and 3 and their seqs.
phylip_text4 = """ 5 42
Turkey AAGCTNGGGC ATTTCAGGGT
Salmo gairAAGCCTTGGC AGTGCAGGGT
H. SapiensACCGGTTGGC CGTTCAGGGT
Chimp AAACCCTTGC CGTTACGCTT
Gorilla AAACCCTTGC CGGTACGCTT
GAGCCCGGGC AATACAGGGT AT
GAGCCGTGGC CGGGCACGGT AT
ACAGGTTGGC CGTTCAGGGT AA
AAACCGAGGC CGGGACACTC AT
AAACCATTGC CGGTACGCTT AA"""
# From here:
# http://evolution.genetics.washington.edu/phylip/doc/sequence.html
phylip_text5 = """ 5 42
Turkey AAGCTNGGGC ATTTCAGGGT
GAGCCCGGGC AATACAGGGT AT
Salmo gairAAGCCTTGGC AGTGCAGGGT
GAGCCGTGGC CGGGCACGGT AT
H. SapiensACCGGTTGGC CGTTCAGGGT
ACAGGTTGGC CGTTCAGGGT AA
Chimp AAACCCTTGC CGTTACGCTT
AAACCGAGGC CGGGACACTC AT
Gorilla AAACCCTTGC CGGTACGCTT
AAACCATTGC CGGTACGCTT AA"""
phylip_text5a = """ 5 42
Turkey AAGCTNGGGC ATTTCAGGGT GAGCCCGGGC AATACAGGGT AT
Salmo gairAAGCCTTGGC AGTGCAGGGT GAGCCGTGGC CGGGCACGGT AT
H. SapiensACCGGTTGGC CGTTCAGGGT ACAGGTTGGC CGTTCAGGGT AA
Chimp AAACCCTTGC CGTTACGCTT AAACCGAGGC CGGGACACTC AT
Gorilla AAACCCTTGC CGGTACGCTT AAACCATTGC CGGTACGCTT AA"""
class TestPhylipIO(unittest.TestCase):
def test_one(self):
handle = StringIO(phylip_text)
ids = []
for alignment in PhylipIterator(handle):
for record in alignment:
ids.append(record.id)
self.assertEqual(ids, ['V_Harveyi_', 'B_subtilis', 'B_subtilis',
'YA80_HAEIN', 'FLIY_ECOLI', 'E_coli_Gln',
'Deinococcu', 'HISJ_E_COL'])
expected = """mkklvlslsl vlafssataa faaipqniri gtdptyapfe sknsqgelvg
fdidlakelc krintqctfv enpldalips lkakkidaim sslsitekrq qeiaftdkly
aadsrlvvak nsdiqptves lkgkrvgvlq gttqetfgne hwapkgieiv syqgqdniys
dltagridaafqdevaaseg flkqpvgkdy kfggpsvkde klfgvgtgmg lrkednelre
alnkafaemradgtyeklak kyfdfdvygg""".replace(" ", "").replace("\n", "").upper()
self.assertEqual(str(record.seq).replace("-", ""), expected)
def test_two_and_three(self):
handle = StringIO(phylip_text2)
list2 = list(PhylipIterator(handle))
handle.close()
self.assertEqual(len(list2), 1)
self.assertEqual(len(list2[0]), 5)
handle = StringIO(phylip_text3)
list3 = list(PhylipIterator(handle))
handle.close()
self.assertEqual(len(list3), 1)
self.assertEqual(len(list3[0]), 5)
for i in range(0, 5):
self.assertEqual(list2[0][i].id, list3[0][i].id)
self.assertEqual(str(list2[0][i].seq), str(list3[0][i].seq))
def test_four(self):
handle = StringIO(phylip_text4)
list4 = list(PhylipIterator(handle))
handle.close()
self.assertEqual(len(list4), 1)
self.assertEqual(len(list4[0]), 5)
def test_five(self):
handle = StringIO(phylip_text5)
self.assertRaises(ValueError, list, PhylipIterator(handle))
handle.close()
def test_five_a(self):
handle = StringIO(phylip_text5a)
list5 = list(PhylipIterator(handle))
handle.close()
self.assertEqual(len(list5), 1)
def test_concatenation(self):
handle = StringIO(phylip_text4 + "\n" + phylip_text4)
self.assertEqual(len(list(PhylipIterator(handle))), 2)
handle = StringIO(phylip_text3 + "\n" + phylip_text4 + "\n\n\n" + phylip_text)
self.assertEqual(len(list(PhylipIterator(handle))), 3)
def test_write_read(self):
handle = StringIO(phylip_text5a)
list5 = list(PhylipIterator(handle))
handle.close()
handle = StringIO()
PhylipWriter(handle).write_file(list5)
handle.seek(0)
list6 = list(PhylipIterator(handle))
self.assertEqual(len(list5), len(list6))
for a1, a2 in zip(list5, list6):
self.assertEqual(len(a1), len(a2))
for r1, r2 in zip(a1, a2):
self.assertEqual(r1.id, r2.id)
self.assertEqual(str(r1.seq), str(r2.seq))
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity=2)
unittest.main(testRunner=runner)
|