File: test_formatdb.py

package info (click to toggle)
python-cogent 1.9-14
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 19,752 kB
  • sloc: python: 137,485; makefile: 149; sh: 64
file content (118 lines) | stat: -rwxr-xr-x 22,700 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python
# Author: Greg Caporaso (gregcaporaso@gmail.com)
# test_formatdb.py

""" Description
File created on 16 Sep 2009.

"""
from __future__ import division
from os.path import split, exists
from cogent import LoadSeqs
from cogent.util.unit_test import TestCase, main
from cogent.app.util import get_tmp_filename
from cogent.util.misc import remove_files
from cogent.app.blast import blastn
from cogent.app.formatdb import FormatDb, build_blast_db_from_seqs,\
    build_blast_db_from_fasta_path, build_blast_db_from_fasta_file

__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2007-2016, The Cogent Project"
__credits__ = ["Greg Caporaso"]
__license__ = "GPL"
__version__ = "1.9"
__maintainer__ = "Greg Caporaso"
__email__ = "gregcaporaso@gmail.com"
__status__ = "Production"

class FormatDbTests(TestCase):
    
    def setUp(self):
        self.in_seqs1_fp =\
         get_tmp_filename(prefix='FormatDbTests',suffix='.fasta')
        self.in_seqs1_file = open(self.in_seqs1_fp,'w')
        self.in_seqs1_file.write(in_seqs1)
        self.in_seqs1_file.close()
        self.in_seqs1 = LoadSeqs(self.in_seqs1_fp,aligned=False)
        self.test_seq = test_seq
        
        self.in_aln1_fp =\
         get_tmp_filename(prefix='FormatDbTests',suffix='.fasta')
        self.in_aln1_file = open(self.in_aln1_fp,'w')
        self.in_aln1_file.write(in_aln1)
        self.in_aln1_file.close()
        self.in_aln1 = LoadSeqs(self.in_aln1_fp)
        
        
        self.files_to_remove = [self.in_seqs1_fp,self.in_aln1_fp]
    
    def tearDown(self):
        remove_files(self.files_to_remove)
        
    def test_call(self):
        """FormatDb: Calling on a nucleotide data functions as expected
        """
        fdb = FormatDb(WorkingDir='/tmp')
        result = fdb(self.in_seqs1_fp)
        
        # test sucessful run
        self.assertEqual(result['ExitStatus'],0)
        
        expected_result_keys = set(\
         ['log','nhr','nin','nsd','nsi','nsq','ExitStatus','StdOut','StdErr'])
        self.assertEqual(set(result.keys()),expected_result_keys)
        
        inputfile_basename = split(self.in_seqs1_fp)[1]
        # got all the expected out files, and filepaths are as expected
        outpaths = []
        for ext in ['log','nhr','nin','nsd','nsi','nsq']:
            outpath = '/tmp/%s.%s' % (inputfile_basename,ext)
            outpaths.append(outpath)
            self.assertEqual(result[ext].name,outpath)
        result.cleanUp()
        
        # all created files are cleaned up
        for outpath in outpaths:
            self.assertFalse(exists(outpath),\
             "%s was not cleaned up." % outpath)
    

in_seqs1 = """>11472286
GATGAACGCTGGCGGCATGCTTAACACATGCAAGTCGAACGGAACACTTTGTGTTTTGAGTTAATAGTTCGATAGTAGATAGTAAATAGTGAACACTATGAACTAGTAAACTATTTAACTAGAAACTCTTAAACGCAGAGCGTTTAGTGGCGAACGGGTGAGTAATACATTGGTATCTACCTCGGAGAAGGACATAGCCTGCCGAAAGGTGGGGTAATTTCCTATAGTCCCCGCACATATTTGTTCTTAAATCTGTTAAAATGATTATATGTTTTATGTTTATTTGATAAAAAGCAGCAAGACAAATGAGTTTTATATTGGTTATACAGCAGATTTAAAAAATAGAATTAGGTCTCATAATCAGGGAGAAAACAAATCAACTAAATCTAAAATACCTTGGGAATTGGTTTACTATGAAGCCTACAAAAACCAAACATCAGCAAGGGTTAGAGAATCAAAGTTGAAACATTATGGGCAATCATTAACTAGACTTAAGAGAAGAATTGGTTTTTGAGAACAAATATGTGCGGGGTAAAGCAGCAATGCGCTCCGAGAGGAACCTCTGTCCTATCAGCTTGTTGGTAAGGTAATGGCTTACCAAGGCGACGACGGGTAGCTGGTGTGAGAGCACGACCAGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGAGGAATTTTCCACAATGGGCGCAAGCCTGATGGAGCAATGCCGCGTGAAGGATGAAGATTTTCGGATTGTAAACTTCTTTTAAGTAGGAAGATTATGACGGTACTACTTGAATAAGCATCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGATGCAAGCGTTATCCGGAATTACTGGGCGTAAAGCGTGTGTAGGTGGTTTATTAAGTTAAATGTTAAATTTTCAGGCTTAACTTGGAAACCGCATTTAATACTGGTAGACTTTGAGGACAAGAGAGGCAGGCGGAATTAGCGGAGTAGCGGTGAAATGCGTAGATATCGCTAAGAACACCAATGGCGAAGGCAGCCTGCTGGTTTGCACCTGACACTGAGATACGAAAGCGTGGGGAGCGAACGGGATTAGATACCCCGGTAGTCCACGCCGTAAACGATGGTCACTAGCTGTTAGGGGCTCGACCCCTTTAGTAGCGAAGCTAACGCGTTAAGTGACCCGCCTGGGGAGTACGATCGCAAGATTAAAACTCAAAGGAATTGACGGGGACCCGCACAAGCGGTGGAACGTGAGGTTTAATTCGTCTCTAAGCGAAAAACCTTACCGAGGCTTGACATCTCCGGAAGACCTTAGAAATAAGGTTGTGCCCGAAAGGGAGCCGGATGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTTGTGAAATGTTCGGTTAAGTCCGTTAACGAGCGCAACCCTTGCTGTGTGTTGTATTTTTCACACAGGACTATCCTGGTCAACAGGGAGGAAGGTGGGGATGACGTCAAGTCAGCATGGCTCTTACGCCTCGGGCTACACTCGCGTTACAATGGCCGGTACAATGGGCTGCCAACTCGTAAGGGGGAGCTAATCCCATCAAAACCGGTCCCAGTTCGGATTGAGGGCTGCAATTCGCCCTCATGAAGTCGGAATCGCTAGTAACCGCGAATCAGCACGTCGCGGTGAATGCGTTCTCGGGTCTTGTACACACTGCCCGTCACACCACGAAAGTTAGTAACGCCCGAAGTGCCCTGTATGGGGTCCTAAGGTGGGGCTAGCGATTGGGGTG
>11472384
AGAGTTTGATCCTGGCTCAGATTGAACGCTGGCGGCATGCCTTACACATGCAAGTCGAACGGCAGCACGGGGGCAACCCTGGTGGCGAGTGGCGAACGGGTGAGTAATACATCGGAACGTGTCCTGTAGTGGGGGATAGCCCGGCGAAAGCCGGATTAATACCGCATACGCTCTACGGAGGAAAGGGGGGGATCTTAGGACCTCCCGCTACAGGGGCGGCCGATGGCAGATTAGCTAGTTGGTGGGGTAAAGGCCTACCAAGGCGACGATCTGTAGCTGGTCTGAGAGGACGACCAGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATTTTGGACAATGGGGGCAACCCTGATCCAGCAATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTTGTCCGGAAAGAAAACGCCGTGGTTAATACCCGTGGCGGATGACGGTACCGGAAGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGCAGGCGGTCCGCTAAGACAGATGTGAAATCCCCGGGCTTAACCTGGGAACTGCATTTGTGACTGGCGGGCTAGAGTATGGCAGAGGGGGGTAGAATTCCACGTGTAGCAGTGAAATGCGTAGAGATGTGGAGGAATACCGATGGCGAAGGCAGCCCCCTGGGCCAATACTGACGCTCATGCACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCCTAAACGATGTCAACTAGTTGTCGGGTCTTCATTGACTTGGTAACGTAGCTAACGCGTGAAGTTGACCGCCTGGGGAGTACGGTCGCAAGATTAAAACTCAAAGGAATTGACGGGGACCCGCACAAGCGGTGGATGATGTGGATTAATTCGATGCAACGCGAAAAACCTTACCTACCCTTGACATGTATGGAATCCTGCTGAGAGGTGGGAGTGCCCGAAAGGGAGCCATAACACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTCCCTAGTTGCTACGCAAGAGCACTCTAGGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAGTCCTCATGGCCCTTATGGGTAGGGCTTCACACGTCATACAATGGTCGGAACAGAGGGTCGCCAACCCGCGAGGGGGAGCCAATCCCAGAAAACCGATCGTAGTCCGGATCGCACTCTGCAACTCGAGTGCGTGAAGCTGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGTCTTGTACACACCGCCCGTCACACCATGGGAGTGGGTTTTACCAGAAGTGGCTAGTCTAACCGCAAGGAGGACGGTCACCACGGTAGGATTCATGACTGGGGTGAAGTCGTAACAAGGTAGCCGTATCGGAAGGTGCGGCTGGATCACCTCCTTTCTCGAGCGAACGTGTCGAACGTTGAGCGCTCACGCTTATCGGCTGTGAAATTAGGACAGTAAGTCAGACAGACTGAGGGGTCTGTAGCTCAGTCGGTTAGAGCACCGTCTTGATAAGGCGGGGGTCGATGGTTCGAATCCATCCAGACCCACCATTGTCT
>11468680
TAAACTGAAGAGTTTGATCCTGGCTCAGATTGAACGCTGGCGGCATGCCTTACACATGCAAGTCGAACGGCAGCACGGGTGCTTGCACCTGGTGGCGAGTGGCGAACGGGTGAGTAATACATCGGAACATGTCCTGTAGTGGGGGATAGCCCGGCGAAAGCCGGATTAATACCGCATACGATCTACGGATGAAAGCGGGGGACCTTCGGGCCTCGCGCTATAGGGTTGGCCGATGGCTGATTAGCTAGTTGGTGGGGTAAAGGCCTACCAAGGCGACGATCAGTAGCTGGTCTGAGAGGACGACCAGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATTTTGGACAATGGGCGAAAGCCTGATCCAGCAATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTTGTCCGGAAAGAAATCCTTGGCTCTAATACAGTCGGGGGATGACGGTACCGGAAGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCGAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGCAGGCGGTTTGCTAAGACCGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTGGTGACTGGCAGGCTAGAGTATGGCAGAGGGGGGTAGAATTCCACGTGTAGCAGTGAAATGCGTAGAGATGTGGAGGAATACCGATGGCGAAGGCAGCCCCCTGGGCCAATACTGACGCTCATGCACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCCTAAACGATGTCAACTAGTTGTTGGGGATTCATTTCCTTAGTAACGTAGCTAACGCGTGAAGTTGACCGCCTGGGGAGTACGGTCGCAAGATTAAAACTCAAAGGAATTGACGGGGACCCGCACAAGCGGTGGATGATGTGGATTAATTCGATGCAACGCGAAAAACCTTACCTACCCTTGACATGGTCGGAATCCCGCTGAGAGGTGGGAGTGCTCGAAAGAGAACCGGCGCACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTCCTTAGTTGCTACGCAAGAGCACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAGTCCTCATGGCCCTTATGGGTAGGGCTTCACACGTCATACAATGGTCGGAACAGAGGGTTGCCAACCCGCGAGGGGGAGCTAATCCCAGAAAACCGATCGTAGTCCGGATTGCACTCTGCAACTCGAGTGCATGAAGCTGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGTCTTGTACACACCGCCCGTCACACCATGGGAGTGGGTTTTACCAGAAGTGGCTAGTCTAACCGCAAGGAGGACGGTCACCACGGTAGGATTCATGACTGGGGTGAAGTCGTAACAAGGTAGCCGTATCGGAAGGTGCGGCTGGATCACCTCCTTTCCAGAGCTATCTCGCAAAGTTGAGCGCTCACGCTTATCGGCTGTAAATTTAAAGACAGACTCAGGGGTCTGTAGCTCAGTCGGTTAGAGCACCGTCTTGATAAGGCGGGGGTCGTTGGTTCGAATCCAACCAGACCCACCATTGTCTG
>11458037
GACGAACGCTGGCGGCGTGCCTAACACATGCAAGTCGAACGGTTTCGAAGATCGGACTTCGAATTTCGAATTTCGATCATCGAGATAGTGGCGGACGGGTGAGTAACGCGTGGGTAACCTACCCATAAAGCCGGGACAACCCTTGGAAACGAGGGCTAATACCGGATAAGCTTGAGAAGTGGCATCACTTTTTAAGGAAAGGTGGCCGATGAGAATGCTGCCGATTATGGATGGACCCGCGTCTGATTAGCTGGTTGGTGGGGTAAAGGCCTACCAAGGCGACGATCAGTAGCCGGCCTGAGAGGGTGAACGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATCTTCCGCAATGGACGAAAGTCTGACGGAGCAACGCCGCGTGTATGATGAAGGTTTTCGGATTGTAAAGTACTGTCTATGGGGAAGAATGGTGTGCTTGAGAATATTAAGTACAAATGACGGTACCCAAGGAGGAAGCCCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGGGCGCGTAGGCGGATAGTTAAGTCCGGTGTGAAAGATCAGGGCTCAACCCTGAGAGTGCATCGGAAACTGGGTATCTTGAGGACAGGAGAGGAAAGTGGAATTCCACGTGTAGCGGTGAAATGCGTAGATATGTGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGACTGTAACTGACGCTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCTGTAAACGATGAGTGCTAGGTGTAGAGGGTATCGACCCCTTCTGTGCCGCAGTTAACACAATAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGTTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGACGCAACGCGAAGAACCTTACCAGGGCTTGACATCCTCTGAACTTGCTGGAAACAGGAAGGTGCCCTTCGGGGAGCAGAGAGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAATCCCGCAACGAGCGCAACCCCTGTATTTAGTTGCTAACGCGTAGAGGCGAGCACTCTGGATAGACTGCCGGTGATAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGTTCTGGGCTACACACGTGCTACAATGGCCGGTACAGACGGAAGCGAAGCCGCGAGGCGGAGCAAATCCGAGAAAGCCGGTCTCAGTTCGGATTGCAGGCTGCAACTCGCCTGCATGAAGTCGGAATCGCTAGTAATCGCAGGTCAGCATACTGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAAAGTCTGCAACACCCGAAGCCGGTGAGGTAACCGACTCGAGATTCGAGGCTCGAAGTTCGAGGATCGAAGTGTAAGCGAAATTAATAAGTCTTAGTAAAGCTAAAAAGCATTAAGACCGATAAGATGATCTTGCAATCGAACATCGAACATCGAATTTCGAACCTCGAGTTGGAGCTAGCCGTCGAAGGTGGGGCCGATAATTGGGGTG
>11469739
AGAGTTTGATCCTGGCTCAGGATGAACGCTGGCGGCGTGCCTAACACATGCAAGTCGAACGAGAAGCTAACTTCTGATTCCTTCGGGATGATGAGGTTAGCAGAAAGTGGCGAACGGGTGAGTAACGCGTGGGTAATCTACCCTGTAAGTGGGGGATAACCCTCCGAAAGGAGGGCTAATACCGCATAATATCTTTATCCCAAAAGAGGTAAAGATTAAAGATGGCCTCTATACTATGCTATCGCTTCAGGATGAGTCCGCGTCCTATTAGTTAGTTGGTGGGGTAATGGCCTACCAAGACGACAATGGGTAGCCGGTCTGAGAGGATGTACGGCCACACTGGGACTGAGATACGGCCCAGACTCCTACGGGAGACAGCAGTGGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCGACGCCGCGTGGATGATGAAGGCCCTTGGGTTGTAAAATCCTGTTCTGGGGGAAGAAAGCTTAAAGGTCCAATAAACCCTTAAGCCTGACGGTACCCCAAGAGAAAGCTCCGGCTAATTATGTGCCAGCAGCCGCGGTAATACATAAGGAGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTCTTAAAAGTCAGTTGTGAAATTATCAGGCTCAACCTGATAAGGTCATCTGAAACTCTAAGACTTGAGGTTAGAAGAGGAAAGTGGAATTCCCGGTGTAGCGGTGAAATGCGTAGATATCGGGAGGAACACCAGTGGCGAAGGCGGCTTTCTGGTCTATCTCTGACGCTGAGGAGCGAAAGCTAGGGGAGCAAACGGGATTAGATACCCCGGTAGTCCTAGCTGTAAACGATGGATACTAGGTGTGGGAGGTATCGACCCCTTCTGTGCCGTAGCTAACGCATTAAGTATCCCGCCTGGGGAGTACGGTCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGACGCAACGCGAAGAACCTTACCGGGACTTGACATTATCTTGCCCGTCTAAGAAATTAGATCTTCTTCCTTTGGAAGACAGGATAACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCACAACGAGCGCAACCCTTGTGCTTAGTTGCTAACTTGTTTTACAAGTGCACTCTAGGCAGACTGCCGCAGATAATGCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTACGTCCCGGGCTACACACGTGCTACAATGGCCTGTACAGAGGGTAGCGAAAGAGCGATCTTAAGCCAATCCCAAAAAGCAGGCCCCAGTTCGGATTGGAGGCTGCAACTCGCCTCCATGAAGTAGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAAAGTTGGCGATACCTGAAGTTACTAGGCTAACCTGGCACTCAACTAAGTTCACTAACTTATTTGCTTAAAATAAGGCTTAATGTGCTTAGTTGAGTGCCGGGAGGCAGGTACCGAAGGTATGGCTGGCGATTGGGGTGAAGTCGTAACAAGGTGGAAA
>11469752
AGAGTTTGATCCTGGCTCAGGATGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGGCAGCGAGTTCCTCACCGAGGTTCGGAACAGTTGACAGTAAACAGTTGACAGTAAACAGTAACTTCAGAAATGAAGCGGACTGTGAACTGTTTACTGTAACCTGTTAGCTATTATTTCGAGCTTTAGTGAGGAATGTCGGCGAGCGGCGGACGGCTGAGTAACGCGTAGGAACGTACCCCAAACTGAGGGATAAGCACCAGAAATGGTGTCTAATACCGCATATGGCCCAGCACCTTTTTTAATCAACCACGACCCTAAAATCGTGAATAATTGGTAGGAAAAGGTGTTGGGTTAAAGCTTCGGCGGTTTGGGAACGGCCTGCGTATGATTAGCTTGTTGGTGAGGTAAAAGCTCACCAAGGCGACGATCATTAGCTGGTCTGAGAGGATGATCAGCCAGACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCCACAATGGGCGAAAGCCTGATGGAGCAACGCCGTGTGCAGGATGAAAGCCTTCGGGTCGTAAACTGCTTTTATATGTGAAGACTTCGACGGTAGCATATGAATAAGGATCGGCTAACTCCGTGCCAGCAGCCGCGGTCATACGGAGGATCCAAGCGTTATCCGGAATTACTGGGCGTAAAGAGTTGCGTAGGTGGCATAGTAAGTTGGTAGTGAAATTGTGTGGCTCAACCATACACCCATTACTAAAACTGCTAAGCTAGAGTATATGAGAGGTAGCTGGAATTCCTAGTGTAGGAGTGAAATCCGTANATATTAGGAGGAACACCGATGGCGTAGGCAGGCTACTGGCATATTACTGACACTAAGGCACGAAAGCGTGGGGAGCGAACGGGATTAGATACCCCGGTAGTCCACGCTGTAAACGATGGATGCTAGCTGTTATGAGTATCGACCCTTGTAGTAGCGAAGCTAACGCGTTAAGCATCCCGCCTGTGGAGTACGAGCGCAAGCTTAAAACATAAAGGAATTGACGGGGACCCGCACAAGCGGTGGAGCGTGTTGTTTAATTCGATGATAAGCGAAGAACCTTACCAAGGCTTGACATCCCTGGAATTTCTCCGAAAGGAGAGAGTGCCTTCGGGAATCAGGTGACAGGTGATGCATGGCCGTCGTCAGCTCGTGTCGTGAGATGTTTGGTTAAGTCCATTAACGAGCGCAACCCTTGTAAATAGTTGGATTTTTCTATTTAGACTGCCTCGGTAACGGGGAGGAAGGAGGGGATGATGTCAGGTCAGTATTTCTCTTACGCCTTGGGCTACAAACACGCTACAATGGCCGGTACAAAGGGCAGCCAACCCGCGAGGGGGAGCAAATCCCATCAAAGCCGGTCTCAGTTCGGATAGCAGGCTGAAATTCGCCTGCTTGAAGTCGGAATCGCTAGTAACGGTGAGTCAGCTATATTACCGTGAATACGTTCCCGGGTCTTGTACACACCGCCCGTCAAGGCATGAAAGTCATCAATACCTGACGTCTGGATTTATTCTGGCCTAAGGTAGGGGCGATGATTGGGCCTAAGTCGTAACAAGGTAA
>11460523
AGAGTTTGATCCTGGCTCAGAACGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAACGCGAAATCGGGCACTCAATTTTGCTTTTCAAACATTAACTGATGAAACGACCAGAGAGATTGTTCCAGTTTAAAGAGTGAAAAGCAGGCTTGAGTGCCTGAGAGTAGAGTGGCGCACGGGTGAGTAACGCGTAAATAATCTACCCCTGCATCTGGGATAACCCACCGAAAGGTGAGCTAATACCGGATACGTTCTTTTAACCGCGAGGTTTTAAGAAGAAAGGTGGCCTCTGATATAAGCTACTGTGCGGGGAGGAGTTTGCGTACCATTAGCTAGTTGGTAGGGTAATGGCCTACCAAGGCATCGATGGTTAGCGGGTCTGAGAGGATGATCCGCCACACTGGAACTGGAACACGGACCAGACTCCTACGGGAGGCAGCAGTGAGGAATATTGCGCAATGGGGGCAACCCTGACGCAGCGACGCCGCGTGGATGATGAAGGCCTTCGGGTCGTAAAATCCTGTCAGATGGAAAGAAGTGTTATATGGATAATACCTGTATAGCTTGACGGTACCATCAAAGGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCGCGTAGGCGGTCTGTTATGTCAGATGTGAAAGTCCACGGCTCAACCGTGGAAGTGCATTTGAAACTGACAGACTTGAGTACTGGAGGGGGTGGTGGAATTCCCGGTGTAGAGGTGAAATTCGTAGATATCGGGAGGAATACCGGTGGCGAAGGCGACCACCTGGCCAGATACTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGTCAACTAGGTGTTGGGATGGTTAATCGTCTCATTGCCGGAGCTAACGCATTAAGTTGACCGCCTGGGGAGTACGGTCGCAAGATTAAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGTATGTGGTTTAATTCGACGCAACGCGCAGAACCTTACCTGGTCTTGACATCCCGAGAATCTCAAGGAAACTTGAGAGTGCCTCTTGAGGAACTCGGTGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTCTTTAGTTGCCATCATTAAGTTGGGCACTCTAAAGAGACTGCCGGTGTCAAACCGGAGGAAGGTGGGGATGACGTCAAGTCCTCATGGCCTTTATGACCAGGGCTACACACGTACTACAATGGCATAGACAAAGGGCAGCGACATCGCGAGGTGAAGCGAATCCCATAAACCATGTCTCAGTCCGGATTGGAGTCTGCAACTCGACTCCATGAAGTTGGAATCGCTAGTAATCGTAGATCAGCATGCTACGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGGGAGTTGGTTGTACCAGAAGCAGTTGAGCGAACTATTCGTAGACGCAGGCTGCCAAGGTATGATTGGTAACTGGGGTGAAGTCGTAACAAGGTAACC
>11460543
TGGTTTGATCCTGGCTCAGGACAAACGCTGGCGGCGTGCCTAACACATGCAAGTCGAACGAGAAGCCAGCTTTTGATTCCTTCGGGATGAGAAAGCAGGTAGAAAGTGGCGAACGGGTGAGTAACGCGTGGGTAATCTACCCTGTAAGTAGGGGATAACCCTCTGAAAAGAGGGCTAATACCGCATAATATCTTTACCCCATAAGAAGTAAAGATTAAAGATGGCCTCTGTATATGCTATCGCTTCAGGATGAGCCCGCGTCCTATTAGTTAGTTGGTAAGGTAATGGCTTACCAAGACCACGATGGGTAGCCGGTCTGAGAGGATGTACGGCCACACTGGGACTGAGATACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCGACGCCGCGTGGATGATGAAGGCCTTCGGGTTGTAAAATCCTGTTTTGGGGGACGAAACCTTAAGGGTCCAATAAACCCTTAAATTGACGGTACCCCAAGAGAAAGCTCCGGCTAATTATGTGCCAGCAGCCGCGGTAATACATAAGGAGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGAGTTCGTAGGCGGTCTTAAAAGTCAGGTGTGAAATTATCAGGCTTAACCTGATACGGTCATCTGAAACTTTAAGACTTGAGGTTAGGAGAGGAAAGTGGAATTCCCGGTGTAGCGGTGAAATGCGTAGATATCGGGAGGAACACCAGTGGCGAAGGCGGCTTTCTGGCCTAACTCTGACGCTGAGGAACGAAAGCTAGGGGAGCAAACGGGATTAGATACCCCGGTAGTCCTAGCTGTAAACGATGGATACTAGGTGTGGGAGGTATCGACCCCTTCTGTGCCGWCACTAACGCATTAAGTATCCCGCCTGGGGAGTACGGTCGCAAGGCTAAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGACGCAACGCGAAGAACCTTACCGGGGCTTGACATTGTCTTGCCCGTTTAAGAAATTAAATTTTCTTCCCTTTTAGGGAAGACAAGATAACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCACAACGAGCGCAACCCTTATTCTTAGTTGCTAGTTTGTTTACAAACGCACTCTAAAGAGACTGCCGCAGATAATGCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTACGTCCCGGGCTACACACGTGCTACAATGGCCTGTACAGAGGGTAGCGAAAGAGCGATCTCAAGCTAATCCCTTAAAACAGGTCTCAGTTCGGATTGGAGGCTGCAACTCGCCTCCATGAAGTCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGAAAGTTGGCGATACCTGAAGTTACTGTGCTAACCCGGCACTCAACTAAGTACATTAAGTCTTATTTTAAGCTATTGTATTTAGTTGAGTGCCGGGAGGCAGGTACCTAAGGTATGGCTAGCGATTGGGGTGAAGTCGTAACAAGGTAGCCG
>11480235
TGGTTTGATCCTGGCTCAGGATTAACGCTGGCGGCGCGCCTTATACATGCAAGTCGAACGAGCCTTGTGCTTCGCACAAGGAAATTCCAAGCACCAAGCACCAAATCTCAAACAAATCCCAATGACCAAAATTCCAAAAACCTAAACATTTTAAATGTTTAGAATTTGGAAAATTGGAATTTGGAATTTATTTGTTATTTGGAATTTATGATTTGGGATTTTCTCGCGCGGAGANCNTNAGTGGCGAACGGGTGAGTAATACGTTGGTATCTACCCCAAAGTAGAGAATAAGCCCGAGAAATCGGGGTTAATACTCTATGTGTTCGAAAGAACAAAGACTTCGGTTGCTTTGGGAAGAACCTGCGGCCTATCAGCTTGTTGGTAAGGTAACGGCTTACCAAGGCTTTGACGGGTAGCTGGTCTGGGAAGACGACCAGCCACAATGGGACTTAGACACGGCCCATACTCCTACGGGAGGCAGCAGTAGGGAATCTTCGGCAATGCCCGAAAGGTGACCGAGCGACGCCGCGTAGAGGAAGAAGATCTTTGGATTGTAAACTCTTTTTCTCCTAGACAAAGTTCTGATTGTATAGGAGGAATAAGGGGTTTCTAAACTCGTGCCAGCAGAAGCGGTAATACGAGTGCCCCAAGCGTTATCCGGAATCATTGGGCGTAGAGCGTTGTATAGGTGGTTTAAAAAGTCCAAAATTAAATCTTTAGGCTCAACCTAAAATCTGTTTTGGAAACTTTTAGACTTGAATAAAATCGACGSGAGTGGAACTTCCAGAGTAGGGGTTACATCCGTTGATACTGGAAGGAACGCCGAAGGCGAAAGCAACTCGCGAGATTTTATTGACGCCGCGTACACGAAAGCGTGGGGAGCGAAAAGTATTAGATACACTTGTAGTCCACGCCGTAAACTATGGATACTAGCAATTTGAAGCTTCGACCCTTCAAGTTGCGGACTAACGCGTTAAGTATCTCGCCTGGGAAGTACGGCCGCAAGGCTAAAACTCAAAGGAATAGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGACGATAAGCGTGGAACCTTACCAGGGCTTAGACGTACAGAGAATTCCTTGGAAACAAGGAAGTGCTTCGGGAACTCTGTACTCAGGTACTGCATGGCTGTCGTCAGTATGTACTGTGAAGCACTCCCTTAATTGGGGCAACATACGCAACCCCTATCCTAAGTTAGAAATGTCTTAGGAAACCGCTTCGATTCATCGGAGAGGAAGATGGGGACGACGTCAAGTCAGCATGGTCCTTGATGTCCTGGGCGACACACGTGCTACAATGGCTAGTATAACGGGATGCGTAGGTGCGAACCGAAGCTAATCCTTAAAAAACTAGTCTAAGTTCGGATTGAAGTCTGCAACTCGACTTCATGAAGCCGGAATCGCTAGTAACCGCAAATCAGCCACGTTGCGGTGAATACGTTCTCGGGCCTTGTACTCACTGCCCGTCACGTCAAAAAAGTCGGTAATACCCGAAGCACCCTTTTAAAGGGTTCTAAGGTAGGACCGATGATTGGGACGAAGTCGTAACAAGGTAGCCG
>11480408
AATTTAGCGGCCGCGAATTCGCCCTTGAGTTTGATCCTGGCTCAGGACGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAACGGGGATATCCGAGCGGAAGGTTTCGGCCGGAAGGTTGGGTATTCGAGTGGCGGACGGGTGAGTAACGCGTGAGCAATCTGTCCCGGACAGGGGGATAACACTTGGAAACAGGTGCTAATACCGCATAAGACCACAGCATCGCATGGTGCAGGGGTAAAAGGAGCGATCCGGTCTGGGGTGAGCTCGCGTCCGATTAGATAGTTGGTGAGGTAACGGCCCACCAAGTCAACGATCGGTAGCCGACCTGAGAGGGTGATCGGCCACATTGGAACTGAGAGACGGTCCAAACTCCTACGGGAGGCAGCAGTGGGGAATATTGGGCAATGGGCGAAAGCCTGACCCAGCAACGCCGCGTGAGTGAAGAAGGCCTTCGGGTTGTAAAGCTCTGTTATGCGAGACGAAGGAAGTGACGGTATCGCATAAGGAAGCCCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGGGCGAGCGTTGTCCGGAATGACTGGGCGTAAAGGGCGTGTAGGCGGCCGTTTAAGTATGGAGTGAAAGTCCATTTTTCAAGGATGGAATTGCTTTGTAGACTGGATGGCTTGAGTGCGGAAGAGGTAAGTGGAATTCCCAGTGTAGCGGTGAAATGCGTAGAGATTGGGAGGAACACCAGTGGCGAAGGCGACTTACTGGGCCGTAACTGACGCTGAGGCGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCTGGTAGTCCACGCGGTAAACGATGAATGCTAGGTGTTGCGGGTATCGACCCCTGCAGTGCCGGAGTAAACACAATAAGCATTCCGCCTGGGGAGTACGGCCGCAAGGTTGAAACTCAAGGGAATTGACGGGGGCCCGCACAAGCAGCGGAGCATGTTGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCAGTTAAGCTCATAGAGATATGAGGTCCCTTCGGGGGAACTGAGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTATGGTCAGTTACTAACGCGTGAAGGCGAGGACTCTGACGAGACTGCCGGGGACAACTCGGAGGAAGGTGGGGACGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACAAACGTGCTACAATGGTGACTACAAAGAGGAGCGAGACTGTAAAGTGGAGCGGATCTCAAAAAAGTCATCCCAGTTCGGATTGTGGGCTGCAACCCGCCCACATGAAGTTGGAGTTGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTTGGGAGCACCCGAAGTCAGTGAGGTAACCGGAAGGAGCCAGCTGCCGAAGGTGAGACCGATGACTGGGGTGAAGTCGTAACAAGGTAGCCGTATCGGAAGGTGCGGCTGGATCACCTCCTTAAGGGCGAATTCGTTTAAACCTGCAGGACTAG
"""

test_seq = """>s1 (11472384)
AGAGTTTGATCCTGGCTCAGATTGAACGCTGGCGGCATGCCTTACACATGCAAGTCGAACGGCAGCACGGGGGCAACCCTGGTGGCGAGTGGCGAACGGGTGAGTAATACATCGGAACGTGTCCTGTAGTGGGGGATAGCCCGGCGAAAGCCGGATTAATACCGCATACGCTCTACGGAGGAAAGGGGGGGATCTTAGGACCTCCCGCTACAGGGGCGGCCGATGGCAGATTAGCTAGTTGGTGGGGTAAAGGCCTACCAAGGCGACGATCTGTAGCTGGTCTGAGAGGACGACCAGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATTTTGGACAATGGGGGCAACCCTGATCCAGCAATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTTGTCCGGAAAGAAAACGCCGTGGTTAATACCCGTGGCGGATGACGGTACCGGAAGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGCAGGCGGTCCGCTAAGACAGATGTGAAATCCCCGGGCTTAACCTGGGAACTGCATTTGTGACTGGCGGGCTAGAGTATGGCAGAGGGGGGTAGAATTCCACGTGTAGCAGTGAAATGCGTAGAGATGTGGAGGAATACCGATGGCGAAGGCAGCCCCCTGGGCCAATACTGACGCTCATGCACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCCTAAACGATGTCAACTAGTTGTCGGGTCTTCATTGACTTGGTAACGTAGCTAACGCGTGAAGTTGACCGCCTGGGGAGTACGGTCGCAAGATTAAAACTCAAAGGAATTGACGGGGACCCGCACAAGCGGTGGATGATGTGGATTAATTCGATGCAACGCGAAAAACCTTACCTACCCTTGACATGTATGGAATCCTGCTGAGAGGTGGGAGTGCCCGAAAGGGAGCCATAACACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTCCCTAGTTGCTACGCAAGAGCACTCTAGGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAGTCCTCATGGCCCTTATGGGTAGGGCTTCACACGTCATACAATGGTCGGAACAGAGGGTCGCCAACCCGCGAGGGGGAGCCAATCCCAGAAAACCGATCGTAGTCCGGATCGCACTCTGCAACTCGAGTGCGTGAAGCTGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGTCTTGTACACACCGCCCGTCACACCATGGGAGTGGGTTTTACCAGAAGTGGCTAGTCTAACCGCAAGGAGGACGGTCACCACGGTAGGATTCATGACTGGGGTGAAGTCGTAACAAGGTAGCCGTATCGGAAGGTGCGGCTGGATCACCTCCTTTCTCGAGCGAACGTGTCGAACGTTGAGCGCTCACGCTTATCGGCTGTGAAATTAGGACAGTAAGTCAGACAGACTGAGGGGTCTGTAGCTCAGTCGGTTAGAGCACCGTCTTGATAAGGCGGGGGTCGATGGTTCGAATCCATCCAGACCCACCATTGTCT
"""

in_aln1 = """>a1
AAACCTTT----TTTTAAATTCCGAAGAGTTTGATCCTGGCTCAGATTGAACGCTGGCGGCATGCCTTACACATGCAAGTCGAACGGCAGCACGGGGGCAACCCTGGTGGCGAGTGGCGAACGGGTGAGTAATACATCGGAACGTGTCCTGTAGTGGGGGATAGCCCGGCGAAAGCCGGATTAATACCGCATACGCTCTACGGAGGAAAGGGGGGGATCTTAGGACCTCCCGCTACAGGGGCGGCCGATGGCAGATTAGCTAGTTGGTGGGGTAAAGGCCTACCAAGGCGACGATCTGTAGCTGGTCTGAGAGGACGACCAGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATTTTGGACAATGGGGGCAACCCTGATCCAGCAATGCCGCGTGTGTGAAGAAGGCCTTC
>a2
AAACCTTT----TTTTAAATTCCGCAGAGTTTGATCCTGGCTCAGATTGAACGCTGGCGGCATGCCTTACACATGCAAGTCGAACGGCAGCACGGGGGCAACCCTGGTGGCGAGTGGCGAACGGGTGAGTAATACATCGGAACGTGTCCTGTAGTGGGGGATAGCCCGGCGAAAGCCGGATTAATACCGCATACGCTCTACGGAGGAAAGGGGGGGATCTTAGGACCTCCCGCTACAGGGGCGGCCGATGGCAGATTAGCTAGTTGGTGGGGTAAAGGCCTACCAAGGCGACGATCTGTAGCTGGTCTGAGAGGACGACCAGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATTTTGGACAATGGGGGCAACCCTGATCCAGCAATGCCGCGTGTGTGAAGAAGGCCTTC
>a3
AAACCTTT----TTTTAAATTCCGGAGAGTTTGATCCTGGCTCAGATTGAACGCTGGCGGCATGCCTTACACATGCAAGTCGAACGGCAGCACGGGGGCAACCCTGGTGGCGAGTGGCGAACGGGTGAGTAATACATCGGAACGTGTCCTGTAGTGGGGGATAGCCCGGCGAAAGCCGGATTAATACCGCATACGCTCTACGGAGGAAAGGGGGGGATCTTAGGACCTCCCGCTACAGGGGCGGCCGATGGCAGATTAGCTAGTTGGTGGGGTAAAGGCCTACCAAGGCGACGATCTGTAGCTGGTCTGAGAGGACGACCAGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATTTTGGACAATGGGGGCAACCCTGATCCAGCAATGCCGCGTGTGTGAAGAAGGCCTTC
>a4
AAACCTTT----TTTTAAATTCCGTAGAGTTTGATCCTGGCTCAGATTGAACGCTGGCGGCATGCCTTACACATGCAAGTCGAACGGCAGCACGGGGGCAACCCTGGTGGCGAGTGGCGAACGGGTGAGTAATACATCGGAACGTGTCCTGTAGTGGGGGATAGCCCGGCGAAAGCCGGATTAATACCGCATACGCTCTACGGAGGAAAGGGGGGGATCTTAGGACCTCCCGCTACAGGGGCGGCCGATGGCAGATTAGCTAGTTGGTGGGGTAAAGGCCTACCAAGGCGACGATCTGTAGCTGGTCTGAGAGGACGACCAGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATTTTGGACAATGGGGGCAACCCTGATCCAGCAATGCCGCGTGTGTGAAGAAGGCCTTC
"""


if __name__ == "__main__":
    main()