1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
|
# Copyright 2006-2014 by Peter Cock. All rights reserved.
# Revisions copyright 2011 Brandon Invergo. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Tests for Bio.AlignIO.MauveIO module."""
import unittest
import os
from io import StringIO
from Bio.AlignIO.MauveIO import MauveIterator, MauveWriter
from Bio import SeqIO
class TestMauveIO(unittest.TestCase):
MAUVE_TEST_DATA_DIR = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "Mauve"
)
SIMPLE_XMFA = os.path.join(MAUVE_TEST_DATA_DIR, "simple.xmfa")
SIMPLE_FA = os.path.join(MAUVE_TEST_DATA_DIR, "simple.fa")
def test_one(self):
ids = []
with open(self.SIMPLE_XMFA) as handle:
for alignment in MauveIterator(handle):
for record in alignment:
ids.append(record.id)
self.assertEqual(
ids,
[
"1/0-5670",
"2/0-5670",
"1/5670-9940",
"2/7140-11410",
"1/9940-14910",
"2/5670-7140",
"2/11410-12880",
],
)
expected = """ATTCGCACAT AAGAATGTAC CTTGCTGTAA TTTATACTCA
GCAGGTGGTG CAGACATCAT AACAAAAGAA GACTCTTGTT GTACTAGATA TTGTGTAGCA
TCACGACCAC ACACACATGG AATGGAAACA CCTGTCTTAA GATTATCATA AGATAGAGTA
CCCATATACA TCACAGCTTC TACACCCGTT AAGGTAGTAG TTTTCTGACC ACAATGTTTA
CACACCACAT TAAGAACTCG CTTTGCAGAT TCCAAATTAG CATGCTGTAG AAGATGGGTC
ATAGTTTCTC TGACATCACC AAGCTCGCCA ACAGTTTTAT TACTGTAAGC GAGTATGAGT
GCACAAAAGT TAGCAGCATC ACCAGCACGG GCTCTATAAT AAGCCTCTTG AAGTGCTGGT
GCATTGAATT TGACTTCAAG CTGTTGAAGT GCTAATAAAA CACTAGACAA ATAACAATTG
TTATCAGCCC ATTTAATTGA AGTTAAACCA CCAACTTGAG GAAATTTCCA TTTCTTTGTG
TGGTTTAAAG CAGACATGTA CCTACCAAGA AAACTCTCAT CAAGAGTATG GTAGTACTCG
AAAGCTTCAC TACGTAGTGT GTCATCACTA GGTAGTACAA AGAAAGTCTT ACCCTCATGA
TTTACATGAG GTTTAATTTT TGTAACATCA GCACCATCCA AGTATGTTGG ACCAAACTGC
TGTCCATATG TCATAGACAT ATCCACAAGC TGTGTGTGGA GATTAGTGTT GTCCACAGTT
GTGAACACTT TTATAGTCTT AACCTCCCGC AGGGATAAGA GACTCTTTAG TTTGTCAAGT
GAAAGAACCT CACCGTCAAG ATGAAACTCG ACGGGGCTCT CCAGAGTGTG GTACACAATT
TTGTCACCAC GCTTAAGAAA TTCAACACCT AACTCTGTAC GCTGTCCTGA ATAGGACCAA
TCTCTGTAAG AGCCAGCCAA AGAAACTGTT TCTACAAAGT GCTCCTCAGA TGTCTTTGAT
GACGAAGTGA GGTATCCATT ATATGTAGTA ACAGCATCTG GTGATGATAC TGACACTACG
GCAGGAGCTT TAAGAGAACG CATACAGCGC GCAGCCTCTT CAAGATTAAA ACCATGTGTC
ACATAACCAA TTGGCATTGT GACAAGCGGC TCATTTAGAG AGTTCAGCTT CGTAATAATA
GAAGCTACAG GCTCTTTACT AGTATAAAAG AAGAATCGGA CACCATAGTC AACGATGCCC
TCTTGAATTT TAATTCCTTT ATACTTACGT TGGATGGTTG CCATTATGGC TCTAACATCC
ATGCATATAG GCATTAATTT TCTTGTCTCT TCAGCATGAG CAAGCATTTC TCTCAAATTC
CAGGATACAG TTCCTAGAAT CTCTTCCTTA GCATTAGGTG CTTCTGAAGG TAGTACATAA
AATGCAGATT TGCATTTCTT AAGAGCAGTC TTAGCTTCCT CAAGTGTATA """
self.assertEqual(
str(record.seq).replace("-", ""),
expected.replace(" ", "").replace("\n", ""),
)
def test_sequence_positions(self):
with open(self.SIMPLE_FA) as handle:
seqs = list(SeqIO.parse(handle, "fasta"))
with open(self.SIMPLE_XMFA) as handle:
aln_list = list(MauveIterator(handle))
for aln in aln_list:
for record in aln:
if not str(record.seq).startswith("-"):
expected = str(record.seq)[0:10]
# seqs 0, 1 are ids 1, 2
actual = seqs[int(record.name) - 1].seq
# Slice out portion mentioned in file
actual = actual[
record.annotations["start"] : record.annotations["end"]
]
if record.annotations["strand"] < 0:
actual = actual.reverse_complement()
# Slice first 10 chars for comparison, don't want to
# get any '-'s by accident
actual = actual[0:10]
if len(actual) == 0:
# We can't test sequences which don't provide
# proper annotation start/end/strand information
continue
self.assertEqual(expected, actual)
def test_write_read(self):
with open(self.SIMPLE_XMFA) as handle:
aln_list = list(MauveIterator(handle))
handle = StringIO()
MauveWriter(handle).write_file(aln_list)
handle.seek(0)
aln_list_out = list(MauveIterator(handle))
for a1, a2 in zip(aln_list, aln_list_out):
self.assertEqual(len(a1), len(a2))
for r1, r2 in zip(a1, a2):
self.assertEqual(r1.id, r2.id)
self.assertEqual(str(r1.seq), str(r2.seq))
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity=2)
unittest.main(testRunner=runner)
|