File: test_blat.py

package info (click to toggle)
python-cogent 1.5.3-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 16,424 kB
  • ctags: 24,343
  • sloc: python: 134,200; makefile: 100; ansic: 17; sh: 10
file content (333 lines) | stat: -rw-r--r-- 26,699 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
#!/usr/bin/env python

from cogent.util.unit_test import TestCase, main
from cogent.app.util import get_tmp_filename
from cogent.app.blat import Blat, assign_reads_to_database, \
                assign_dna_reads_to_dna_database, \
                assign_dna_reads_to_protein_database
from os.path import join, exists
from os import remove
from re import search

__author__ = "Adam Robbins-Pianka"
__copyright__ = "Copyright 2007-2012, The QIIME Project"
__credits__ = ["Adam Robbins-Pianka", "Daniel McDonald"]
__license__ = "GPL"
__version__ = "1.5.3"
__maintainer__ = "Adam Robbins-Pianka"
__email__ = "adam.robbinspianka@colorado.edu"
__status__ = "Prototype"

class BlatTests(TestCase):
    files_to_remove = []

    def setUp(self):
        """Sets up files for testing.
        """
        self.test_db_prot_filename = get_tmp_filename().replace('"', '')
        self.test_db_prot = open(self.test_db_prot_filename, 'w')
        self.test_db_dna_filename = get_tmp_filename().replace('"', '')
        self.test_db_dna = open(self.test_db_dna_filename, 'w')
        self.test_query_filename = get_tmp_filename().replace('"', '')
        self.test_query = open(self.test_query_filename, 'w')

        # write the global variables at the bottom of this file to the
        # temporary test files. Can't use file-like objects because the
        # external application needs actual files.
        self.test_db_prot.write('\n'.join(test_db_prot))
        self.test_db_dna.write('\n'.join(test_db_dna))
        self.test_query.write('\n'.join(test_query))

        # close the files
        self.test_db_prot.close()
        self.test_db_dna.close()
        self.test_query.close()

        # prepare output file path
        self.testout = get_tmp_filename().replace('"', '')

        self.files_to_remove += [self.test_db_prot_filename,
                            self.test_db_dna_filename,
                            self.test_query_filename, self.testout]

    def tearDown(self):
        """Removes temporary files created during the tests
        """
        for filename in self.files_to_remove:
            if exists(filename): remove(filename)

    def test_assign_reads_to_database(self):
        """Tests that assign_reads_to_database works as expected.

        Checks the output file against the expected result when known
        database and query files are used.
        """
        exp = [l for l in assign_reads_exp if not l.startswith('#')]
        obs_lines = assign_reads_to_database(self.test_query_filename,
                                             self.test_db_dna_filename,
                                             self.testout).read().splitlines()
        obs = [l for l in obs_lines if not l.startswith('#')]
        
        self.assertEqual(obs, exp)

    def test_assign_dna_reads_to_dna_database(self):
        """Tests that assign_dna_reads_to_dna_database works as expected.

        Checks the output file against the expected result when known
        database and query files are used.
        """
        exp = [l for l in assign_reads_exp if not l.startswith('#')]

        obs_lines = assign_dna_reads_to_dna_database(self.test_query_filename,
                                             self.test_db_dna_filename,
                                             self.testout).read().splitlines()
        obs = [l for l in obs_lines if not l.startswith('#')]
        
        self.assertEqual(obs, exp)

    def test_assign_dna_reads_to_protein_database(self):
        """Tests that assign_dna_reads_to_protein_database works as expected.

        Checks the output file against the expected result when known
        database and query files are used.
        """
        exp = [l for l in assign_reads_prot_exp if not l.startswith('#')]

        obs_lines = assign_dna_reads_to_protein_database(
                    self.test_query_filename,
                    self.test_db_prot_filename,
                    self.testout).read().splitlines()
        obs = [l for l in obs_lines if not l.startswith('#')]
        
        self.assertEqual(obs, exp)

    def test_get_base_command(self):
        """Tests that _get_base_command generates the proper command given
        various inputs.
        """
        test_parameters_blank = {}
        files = (self.test_query_filename, self.test_db_dna_filename, 
                self.testout)
        exp_blank = 'blat %s %s %s' % (files[1], files[0], files[2])

        # initialize a Blat instance with these parameters and get the
        # command string
        b = Blat(params = {}, HALT_EXEC=True)
        # need to set the positional parameters' values
        b._input_as_list(files)
        cmd = b._get_base_command()

        # find the end of the cd command and trim the base command
        cmd_index = search('cd ".+"; ', cmd).end()
        cmd = cmd[cmd_index:]
        self.assertEqual(cmd, exp_blank)

        test_parameters_1 = {
            '-t': 'dna',
            '-q': 'dna',
            '-ooc': '11.ooc',
            '-tileSize': 1,
            '-stepSize': 2,
            '-oneOff': 1,
            '-minMatch': 2,
            '-minScore': 3,
            '-minIdentity': 4,
            '-maxGap': 5,
            '-makeOoc': 'N.ooc',
            '-repMatch': 6,
            '-mask': 'lower',
            '-qMask': 'lower',
            '-repeats': 'lower',
            '-minRepDivergence': 7,
            '-dots': 8,
            '-out': 'psl',
            '-maxIntron': 9}
        exp_1 = 'blat %s %s ' % (files[1], files[0]) + \
                '-dots=8 -makeOoc="N.ooc" -mask=lower -maxGap=5 ' + \
                '-maxIntron=9 -minIdentity=4 -minMatch=2 ' + \
                '-minRepDivergence=7 -minScore=3 -oneOff=1 -ooc="11.ooc" ' + \
                '-out=psl -q=dna -qMask=lower -repMatch=6 -repeats=lower ' + \
                '-stepSize=2 -t=dna -tileSize=1 %s' % files[2]

        # initialize a Blat instance with these parameters and get the
        # command string
        b = Blat(params = test_parameters_1, HALT_EXEC=True)
        # need to set the positional parameters' values
        b._input_as_list(files)
        cmd = b._get_base_command()

        # find the end of the cd command and trim the base command
        cmd_index = search('cd ".+"; ', cmd).end()
        cmd = cmd[cmd_index:]
        self.assertEqual(cmd, exp_1)

        test_parameters_2 = {
            '-tileSize': 1,
            '-stepSize': 2,
            '-minMatch': 2,
            '-minScore': 3,
            '-minIdentity': 4,
            '-maxGap': 5,
            '-makeOoc': 'N.ooc',
            '-out': 'psl',
            '-maxIntron': 9}
        exp_2 = 'blat %s %s ' % (files[1], files[0]) + \
                '-makeOoc="N.ooc" -maxGap=5 -maxIntron=9 -minIdentity=4 ' + \
                '-minMatch=2 -minScore=3 -out=psl -stepSize=2 ' + \
                '-tileSize=1 %s' % files[2]

        # initialize a Blat instance with these parameters and get the
        # command string
        b = Blat(params = test_parameters_2, HALT_EXEC=True)
        # need to set the positional parameters' values
        b._input_as_list(files)
        cmd = b._get_base_command()

        # find the end of the cd command and trim the base command
        cmd_index = search('cd ".+"; ', cmd).end()
        cmd = cmd[cmd_index:]
        self.assertEqual(cmd, exp_2)

assign_reads_exp = """# BLAT 34 [2006/03/10]
# Query: NZ_GG770509_647533119
# Database: test_db.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_GG770509_647533119	NZ_GG770509_647533119	100.00	1371	0	0	1	1371	1	1371	0.0e+00	2187.0
NZ_GG770509_647533119	NZ_ACIZ01000148_643886127	85.49	634	92	0	336	969	337	970	4.5e-234	807.0
NZ_GG770509_647533119	NZ_ACIZ01000148_643886127	86.08	237	33	0	1135	1371	1137	1373	1.2e-77	287.0
NZ_GG770509_647533119	NZ_ACIZ01000148_643886127	83.12	154	26	0	976	1129	977	1130	2.2e-48	190.0
NZ_GG770509_647533119	NZ_GG739926_647533195	78.42	329	71	0	656	984	657	985	4.8e-97	351.0
NZ_GG770509_647533119	NZ_GG739926_647533195	89.09	110	11	1	1138	1246	1141	1250	1.1e-30	131.0
NZ_GG770509_647533119	NZ_GG739926_647533195	86.96	69	9	0	1021	1089	1023	1091	3.2e-20	96.0
NZ_GG770509_647533119	NZ_GG739926_647533195	75.26	97	22	2	356	450	356	452	2.3e-13	73.0
NZ_GG770509_647533119	NZ_GG739926_647533195	90.57	53	5	0	1319	1371	1315	1367	2.5e-10	63.0
NZ_GG770509_647533119	NZ_GG739926_647533195	81.82	22	4	0	989	1010	992	1013	1.5e+02	24.0
# BLAT 34 [2006/03/10]
# Query: NZ_GG739926_647533195
# Database: test_db.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_GG739926_647533195	NZ_GG739926_647533195	100.00	1367	0	0	1	1367	1	1367	0.0e+00	2235.0
NZ_GG739926_647533195	NZ_ACIZ01000148_643886127	76.22	572	136	0	414	985	414	985	1.7e-158	556.0
NZ_GG739926_647533195	NZ_ACIZ01000148_643886127	76.80	181	42	0	1023	1203	1022	1202	6.4e-53	205.0
NZ_GG739926_647533195	NZ_ACIZ01000148_643886127	96.00	50	2	0	1209	1258	1207	1256	6.4e-14	75.0
NZ_GG739926_647533195	NZ_ACIZ01000148_643886127	88.68	53	6	0	1315	1367	1321	1373	1.6e-09	61.0
NZ_GG739926_647533195	NZ_ACIZ01000148_643886127	77.27	22	5	0	992	1013	990	1011	8.5e+02	22.0
NZ_GG739926_647533195	NZ_GG770509_647533119	79.29	280	58	0	657	936	656	935	9.9e-82	301.0
NZ_GG739926_647533195	NZ_GG770509_647533119	89.09	110	11	1	1141	1250	1138	1246	1.1e-30	131.0
NZ_GG739926_647533195	NZ_GG770509_647533119	86.96	69	9	0	1023	1091	1021	1089	3.2e-20	96.0
NZ_GG739926_647533195	NZ_GG770509_647533119	75.26	97	22	2	356	452	356	450	2.3e-13	73.0
NZ_GG739926_647533195	NZ_GG770509_647533119	90.57	53	5	0	1315	1367	1319	1371	2.5e-10	63.0
NZ_GG739926_647533195	NZ_GG770509_647533119	80.00	30	6	0	956	985	955	984	1.2e-03	41.0
NZ_GG739926_647533195	NZ_GG770509_647533119	81.82	22	4	0	992	1013	989	1010	1.5e+02	24.0
# BLAT 34 [2006/03/10]
# Query: NZ_ACIZ01000148_643886127
# Database: test_db.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_ACIZ01000148_643886127	NZ_ACIZ01000148_643886127	100.00	1373	0	0	1	1373	1	1373	0.0e+00	2165.0
NZ_ACIZ01000148_643886127	NZ_GG770509_647533119	85.49	634	92	0	337	970	336	969	4.5e-234	807.0
NZ_ACIZ01000148_643886127	NZ_GG770509_647533119	86.08	237	33	0	1137	1373	1135	1371	1.2e-77	287.0
NZ_ACIZ01000148_643886127	NZ_GG770509_647533119	83.12	154	26	0	977	1130	976	1129	2.2e-48	190.0
NZ_ACIZ01000148_643886127	NZ_GG739926_647533195	76.22	572	136	0	414	985	414	985	1.7e-158	556.0
NZ_ACIZ01000148_643886127	NZ_GG739926_647533195	76.80	181	42	0	1022	1202	1023	1203	6.4e-53	205.0
NZ_ACIZ01000148_643886127	NZ_GG739926_647533195	96.00	50	2	0	1207	1256	1209	1258	6.4e-14	75.0
NZ_ACIZ01000148_643886127	NZ_GG739926_647533195	88.68	53	6	0	1321	1373	1315	1367	1.6e-09	61.0
NZ_ACIZ01000148_643886127	NZ_GG739926_647533195	77.27	22	5	0	990	1011	992	1013	8.5e+02	22.0
""".splitlines()

assign_reads_prot_exp = """# BLAT 34x13 [2009/02/26]
# Query: NZ_GG770509_647533119_frame_1
# Database: /home/adro2179/metagenome/test_db_prot.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_GG770509_647533119_frame_1	NZ_GG770509_647533119	96.83	441	0	7	1	427	1	441	8.9e-254	872.0
# BLAT 34x13 [2009/02/26]
# Query: NZ_GG770509_647533119_frame_2
# Database: /home/adro2179/metagenome/test_db_prot.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_GG770509_647533119_frame_2	NZ_ACIZ01000148_643886127	85.37	41	6	0	359	399	362	402	8.0e-13	72.0
NZ_GG770509_647533119_frame_2	NZ_ACIZ01000148_643886127	93.75	16	1	0	419	434	421	436	1.3e+00	31.0
NZ_GG770509_647533119_frame_2	NZ_GG739926_647533195	75.86	29	7	0	320	348	326	354	2.9e-04	43.0
# BLAT 34x13 [2009/02/26]
# Query: NZ_GG770509_647533119_frame_3
# Database: /home/adro2179/metagenome/test_db_prot.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_GG770509_647533119_frame_3	NZ_ACIZ01000148_643886127	80.61	98	19	0	210	307	209	306	7.5e-39	158.0
NZ_GG770509_647533119_frame_3	NZ_ACIZ01000148_643886127	66.33	98	33	0	43	140	44	141	8.9e-27	118.0
NZ_GG770509_647533119_frame_3	NZ_ACIZ01000148_643886127	78.95	38	8	0	310	347	308	345	2.3e-08	57.0
NZ_GG770509_647533119_frame_3	NZ_ACIZ01000148_643886127	66.67	30	10	0	178	207	178	207	2.5e-01	33.0
NZ_GG770509_647533119_frame_3	NZ_GG739926_647533195	53.00	100	47	0	131	230	134	233	1.9e-18	90.0
NZ_GG770509_647533119_frame_3	NZ_GG739926_647533195	68.89	45	14	0	238	282	241	285	5.9e-09	59.0
NZ_GG770509_647533119_frame_3	NZ_GG739926_647533195	72.09	43	12	0	63	105	66	108	3.0e-08	56.0
# BLAT 34x13 [2009/02/26]
# Query: NZ_GG739926_647533195_frame_1
# Database: /home/adro2179/metagenome/test_db_prot.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_GG739926_647533195_frame_1	NZ_GG739926_647533195	100.00	437	0	0	1	437	1	437	1.7e-263	904.0
NZ_GG739926_647533195_frame_1	NZ_ACIZ01000148_643886127	69.86	73	22	0	213	285	209	281	1.1e-20	98.0
NZ_GG739926_647533195_frame_1	NZ_ACIZ01000148_643886127	53.33	60	28	0	148	207	145	204	1.3e-06	51.0
NZ_GG739926_647533195_frame_1	NZ_ACIZ01000148_643886127	60.53	38	15	0	66	103	64	101	1.9e-03	41.0
NZ_GG739926_647533195_frame_1	NZ_ACIZ01000148_643886127	76.92	26	6	0	2	27	3	28	9.7e-03	38.0
NZ_GG739926_647533195_frame_1	NZ_ACIZ01000148_643886127	69.57	23	7	0	288	310	285	307	4.8e+00	29.0
NZ_GG739926_647533195_frame_1	NZ_ACIZ01000148_643886127	90.00	10	1	0	134	143	132	141	1.6e+04	18.0
# BLAT 34x13 [2009/02/26]
# Query: NZ_GG739926_647533195_frame_2
# Database: /home/adro2179/metagenome/test_db_prot.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_GG739926_647533195_frame_2	NZ_GG770509_647533119	66.67	42	14	0	270	311	276	317	2.3e-08	57.0
NZ_GG739926_647533195_frame_2	NZ_GG770509_647533119	60.00	45	18	0	185	229	188	232	3.9e-06	49.0
NZ_GG739926_647533195_frame_2	NZ_GG770509_647533119	80.00	20	4	0	247	266	251	270	5.6e-01	32.0
# BLAT 34x13 [2009/02/26]
# Query: NZ_GG739926_647533195_frame_3
# Database: /home/adro2179/metagenome/test_db_prot.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_GG739926_647533195_frame_3	NZ_ACIZ01000148_643886127	94.44	18	1	0	390	407	385	402	4.3e-03	39.0
# BLAT 34x13 [2009/02/26]
# Query: NZ_ACIZ01000148_643886127_frame_1
# Database: /home/adro2179/metagenome/test_db_prot.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_ACIZ01000148_643886127_frame_1	NZ_ACIZ01000148_643886127	100.00	436	0	0	1	436	1	436	2.1e-261	897.0
NZ_ACIZ01000148_643886127_frame_1	NZ_GG739926_647533195	78.57	42	9	0	240	281	244	285	4.0e-10	63.0
NZ_ACIZ01000148_643886127_frame_1	NZ_GG739926_647533195	60.53	38	15	0	64	101	66	103	1.9e-03	41.0
NZ_ACIZ01000148_643886127_frame_1	NZ_GG739926_647533195	76.92	26	6	0	3	28	2	27	9.7e-03	38.0
NZ_ACIZ01000148_643886127_frame_1	NZ_GG739926_647533195	69.57	23	7	0	285	307	288	310	4.8e+00	29.0
# BLAT 34x13 [2009/02/26]
# Query: NZ_ACIZ01000148_643886127_frame_2
# Database: /home/adro2179/metagenome/test_db_prot.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_ACIZ01000148_643886127_frame_2	NZ_GG770509_647533119	79.59	147	26	2	182	324	189	335	2.3e-61	233.0
NZ_ACIZ01000148_643886127_frame_2	NZ_GG770509_647533119	72.73	33	9	0	128	160	137	169	5.0e-04	42.0
NZ_ACIZ01000148_643886127_frame_2	NZ_GG770509_647533119	90.91	22	2	0	70	91	76	97	2.5e-03	40.0
# BLAT 34x13 [2009/02/26]
# Query: NZ_ACIZ01000148_643886127_frame_3
# Database: /home/adro2179/metagenome/test_db_prot.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_ACIZ01000148_643886127_frame_3	NZ_GG770509_647533119	84.21	38	4	1	360	395	367	404	3.0e-08	56.0
NZ_ACIZ01000148_643886127_frame_3	NZ_GG770509_647533119	94.12	17	1	0	413	429	425	441	1.6e+00	31.0
NZ_ACIZ01000148_643886127_frame_3	NZ_GG739926_647533195	78.57	28	5	1	321	347	326	353	1.5e-03	41.0"""
assign_reads_prot_exp = assign_reads_prot_exp.splitlines()

test_db_prot = """>NZ_GG770509_647533119
YLEFDPGSERTLAAGLTHASRASGRRVSNAWERTICYGITQGNLCYRMetWKVGKSARVGLASWWGKGSPRRRSIAGLRGSATLGLRHGPDSYGRQQWGILDNGRKPDPAMetPRERPGCKALSPVKMetTVTGEEAPANFVPAAAVIRRGLALFGFTGRKAHVGGLLSQGNPGAQPRNCLYWKSVWRVEFRVRNSIFGGTPVAKAAHWTNRGAKAWGANRIRYPGSPRRKRMetLAVGASVAQLTHTFRLGSAVARLKLKGIDGGPHKRWSMetWFNSKQRAEPYQPLTSTGAAWLSSARVVRCWVKSRNERNPRPLPAWALGDCRAGGRWGRQVLMetALTGWATHVLQWWSVGSEHASVSSPPSQFGCTLQLECRSWNRSRISMetPRIRSRALYTPPVTPWELVLPEGACAGDHGRVSDWGEVVTRPGNLRLDHLLS
>NZ_GG739926_647533195
WEFDPGSGTLATGLTHASRGTGARVSNAYPTFPRPRDNLPKGRLIPYVQSRSRMGMRPISLLAGQRPTKASIGRGSERKAPHTGTETRSRLLREAAVRNIGQWAEATSQVACRTTAYGLTAFMRGYAGTAIRTGFRASSRGNTEGPGVIRIYWVRERRPPCKRAVKSSGPTAALRRELLGLSAPEAGGIRGVAVKCLDITKNPDCEGSPLWRLTLRLEGAGIEQDIPWSARTMDTRCPALGGQAKALSIPPGEYAGNGETQRNRGPAQAEEHVVFDDTRGTLPGLELRCCMVVVSSCREVSAQVPRAQPLSAVAIGRALCGHCRRKVEEGGDDVKSARPLRPGPHTCYNGRQRAVRAQVRVNPLRSQFGWGLQPDPRSWIRSRISHGAVNTFPGLVHTARQAMKAGGASPCRPRAKPVIGAKSQGSRTGRCGWNTSF
>NZ_ACIZ01000148_643886127
NMEFDPGSGTLAACLIHASRTSGGRVSNTWVTCPVGDNIWKQMLIPHKESRFWMDPRRISLVRRLTKAMIRSRTERLIGHIGTETRPKLLREAAVGNLPQWTQVWSNAAVKKAFGSNSVVGEDDGIQPESHGLRASSRGNTVASVIRIYWASERRRFFKSDVKALGLTEEVHRKLGNLSAEEDSGTPCVAVKCVDIWKNTSGEGGCLVLTLRLESMGSEQDIPWSMPTMNARCWSFSAAANALSIPPGEYDRKVETQRNRGPAQAVEHVVFEATRRTLPGLDIDRWCMVVVSSCREMLGVPQRAQPLLVASMGTLVRLPVTNRRKVGMTSNHHAPYDLGYTRATMDGNELRDREVKLISSILSSDVGCNSPTEVGIASNRGSARRGEYVPGPCTHRPSHHESLHPKPVRSEPSKVGQMIRVKSQGSRRRTCGWITS"""
test_db_prot = test_db_prot.splitlines()

test_db_dna = """>NZ_GG770509_647533119
UACUUGGAGUUUGAUCCUGGCUCAGAACGAACGCUGGCGGCAGGCUUAACACAUGCAAGUCGAGCGAGCGGCAGACGGGUGAGUAACGCGUGGGAACGUACCAUUUGCUACGGAAUAACUCAGGGAAACUUGUGCUAAUACCGUAUGUGGAAAGUCGGCAAAUGAUCGGCCCGCGUUGGAUUAGCUAGUUGGUGGGGUAAAGGCUCACCAAGGCGACGAUCCAUAGCUGGUCUGAGAGGAUGAUCAGCCACACUGGGACUGAGACACGGCCCAGACUCCUACGGGAGGCAGCAGUGGGGAAUAUUGGACAAUGGGCGCAAGCCUGAUCCAGCCAUGCCGCGUGAGUGAUGAAGGCCCUAGGGUUGUAAAGCUCUUUCACCGGUGAAGAUGACGGUAACCGGAGAAGAAGCCCCGGCUAACUUCGUGCCAGCAGCCGCGGUAAUACGAAGGGGGCUAGCGUUGUUCGGAUUUACUGGGCGUAAAGCGCACGUAGGCGGACUUUUAAGUCAGGGGUGAAAUCCCGGGGCUCAACCCCGGAACUGCCUUUGAUACUGGAAGUCUUGAGUAUGGUAGAGGUGAGUGGAAUUCCGAGUGUAGAGGUGAAAUUCGUAGAUAUUCGGAGGAACACCAGUGGCGAAGGCGGCUCACUGGACCAACUGACGCUGAGGUGCGAAAGCGUGGGGAGCAAACAGGAUUAGAUACCCUGGUAGUCCACGCCGUAAACGAUGAAUGUUAGCCGUCGGGGCUUCGGUGGCGCAGCUAACGCAUUAAACAUUCCGCCUGGGGAGUGCGGUCGCAAGAUUAAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGUGGAGCAUGUGGUUUAAUUCGAAGCAACGCGCAGAACCUUACCAGCCCUUGACAUCGACAGGUGCUGCAUGGCUGUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUCGCCCUUAGUUGCCAGCAUGGGCACUCUAAGGGGACUGCCGGUGAUAAGCCGGAGGAAGGUGGGGAUGACGUCAAGUCCUCAUGGCCCUUACGGGCUGGGCUACACACGUGCUACAAUGGUGGUCAGUGGGCAGCGAGCACGCGAGUGUGAGCUAAUCUCCGCCAUCUCAGUUCGGAUGCACUCUGCAACUCGAGUGCAGAAGUUGGAAUCGCUAGUAAUCGCGGAUCAGCAUGCCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCACACCAUGGGAGUUGGUUUUACCCGAAGGCGCUUGCUAGGCAGGCGACCACGGUAGGGUCAGCGACUGGGGUGAAGUCGUAACAAGGUAGCCGUAGGGGAACCUGCGGCUGGAUCACCUCCUUUCU
>NZ_GG739926_647533195
UAAUGGGAGUUUGAUCCUGGCUCAGGAUGAACGCUGGCUACAGGCUUAACACAUGCAAGUCGAGGGACCGGCGCACGGGUGAGUAACGCGUAUCCAACCUUCCCGCGACCAAGGGAUAACCUGCCGAAAGGCAGACUAAUACCUUAUGUCCAAAGUCGGUCACGGAUGGGGAUGCGUCCGAUUAGCUUGUUGGCGGGGCAACGGCCCACCAAGGCAUCGAUCGGUAGGGGUUCUGAGAGGAAGGCCCCCCACACUGGAACUGAGACACGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGUCAAUGGGCGGAAGCCUGAACCAGCCAAGUAGCGUGCAGGACGACGGCCUACGGGUUGUAAACUGCUUUUAUGCGGGGAUAUGCAGGUACCGCAUGAAUAAGGACCGGCUAAUUCCGUGCCAGCAGCCGCGGUAAUACGGAAGGUCCGGGCGUUAUCCGGAUUUAUUGGGUUUAAAGGGAGCGCAGGCCGCCGUGCAAGCGUGCCGUGAAAAGCAGCGGCCCAACCGCUGCCCUGCGGCGCGAACUGCUUGGCUUGAGUGCGCCGGAAGCGGGCGGAAUUCGUGGUGUAGCGGUGAAAUGCUUAGAUAUCACGAAGAACCCCGAUUGCGAAGGCAGCCCGCUGUGGCGACUGACGCUGAGGCUCGAAGGUGCGGGUAUCGAACAGGAUUAGAUACCCUGGUAGUCCGCACGGUAAACGAUGGAUACCCGCUGUCCGGCUCUGGGCGGCCAAGCGAAAGCGUUAAGUAUCCCACCUGGGGAGUACGCCGGCAACGGUGAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGAGGAACAUGUGGUUUAAUUCGAUGAUACGCGAGGAACCUUACCCGGGCUUGAAUUGUGAAGGUGCUGCAUGGUUGUCGUCAGCUCGUGCCGUGAGGUGUCGGCUCAAGUGCCAUAACGAGCGCAACCCCUCUCCGCAGUUGCCAUCGGCCGGGCACUCUGCGGACACUGCCGCCGCAAGGUGGAGGAAGGUGGGGAUGACGUCAAAUCAGCACGGCCCUUACGUCCGGGGCCACACACGUGUUACAAUGGCCGGCAGAGGGCUGUCCGCGCGCAAGUGCGGGUGAAUCCCCUCCGGUCCCAGUUCGGAUGGGGUCUGCAACCCGACCCCAGAAGCUGGAUUCGCUAGUAAUCGCGCAUCAGCCAUGGCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCAAGCCAUGAAAGCCGGGGGUGCCUGAAGUCCGUGUCGGCCUAGGGCAAAACCGGUGAUUGGGGCUAAGUCGUAACAAGGUAGCCGUACCGGAAGGUGCGGCUGGAACACCUCCUUUCU
>NZ_ACIZ01000148_643886127
AAUAUGGAGUUUGAUCCUGGCUCAGGAUGAACGCUGGCGGCGUGCCUAAUACAUGCAAGUCGAACGAGUGGCGGACGGGUGAGUAACACGUGGGUAACCUGCCCUUAAGUGGGGGAUAACAUUUGGAAACAGAUGCUAAUACCGCAUAAAGAAAGUCGCUUUUGGAUGGACCCGCGGCGUAUUAGCUAGUUGGUGAGGUAACGGCUCACCAAGGCAAUGAUACGUAGCCGAACUGAGAGGUUGAUCGGCCACAUUGGGACUGAGACACGGCCCAAACUCCUACGGGAGGCAGCAGUAGGGAAUCUUCCACAAUGGACGCAAGUCUGAUGGAGCAACGCCGCGUGAGUGAAGAAGGCUUUCGGGUCGUAAAACUCUGUUGUUGGAGAAGAUGACGGUAUCCAACCAGAAAGCCACGGCUAACUACGUGCCAGCAGCCGCGGUAAUACGUAGGUGGCAAGCGUUAUCCGGAUUUAUUGGGCGUAAAGCGAGCGCAGGCGGUUUUUUAAGUCUGAUGUGAAAGCCCUCGGCUUAACCGAGGAAGUGCAUCGGAAACUGGGAAACUUGAGUGCAGAAGAGGACAGUGGAACUCCAUGUGUAGCGGUGAAAUGCGUAGAUAUAUGGAAGAACACCAGUGGCGAAGGCGGCUGUCUGGUCUGACUGACGCUGAGGCUCGAAAGCAUGGGUAGCGAACAGGAUUAGAUACCCUGGUAGUCCAUGCCGUAAACGAUGAAUGCUAGGUGUUGGAGCUUCAGUGCCGCAGCUAACGCAUUAAGCAUUCCGCCUGGGGAGUACGACCGCAAGGUUGAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGUGGAGCAUGUGGUUUAAUUCGAAGCAACGCGAAGAACCUUACCAGGUCUUGACAUCGACAGGUGGUGCAUGGUUGUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUUAUGACUAGUUGCCAGCAUGGGCACUCUAGUAAGACUGCCGGUGACAAACCGGAGGAAGGUGGGGAUGACGUCAAAUCAUCAUGCCCCUUAUGACCUGGGCUACACACGUGCUACAAUGGAUGGCAACGAGUUGCGAGACCGCGAGGUCAAGCUAAUCUCUUCCAUUCUCAGUUCGGAUGUAGGCUGCAACUCGCCUACAGAAGUCGGAAUCGCUAGUAAUCGCGGAUCAGCACGCCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCACACCAUGAGAGUUUGUAACACCCGAAGCCGGUGCGUAGCGAGCCGUCUAAGGUGGGACAAAUGAUUAGGGUGAAGUCGUAACAAGGUAGCCGUAGGAGAACCUGCGGCUGGAUCACCUCCUUUCU"""
test_db_dna = test_db_dna.splitlines()

test_query = """>NZ_GG770509_647533119
UACUUGGAGUUUGAUCCUGGCUCAGAACGAACGCUGGCGGCAGGCUUAACACAUGCAAGUCGAGCGAGCGGCAGACGGGUGAGUAACGCGUGGGAACGUACCAUUUGCUACGGAAUAACUCAGGGAAACUUGUGCUAAUACCGUAUGUGGAAAGUCGGCAAAUGAUCGGCCCGCGUUGGAUUAGCUAGUUGGUGGGGUAAAGGCUCACCAAGGCGACGAUCCAUAGCUGGUCUGAGAGGAUGAUCAGCCACACUGGGACUGAGACACGGCCCAGACUCCUACGGGAGGCAGCAGUGGGGAAUAUUGGACAAUGGGCGCAAGCCUGAUCCAGCCAUGCCGCGUGAGUGAUGAAGGCCCUAGGGUUGUAAAGCUCUUUCACCGGUGAAGAUGACGGUAACCGGAGAAGAAGCCCCGGCUAACUUCGUGCCAGCAGCCGCGGUAAUACGAAGGGGGCUAGCGUUGUUCGGAUUUACUGGGCGUAAAGCGCACGUAGGCGGACUUUUAAGUCAGGGGUGAAAUCCCGGGGCUCAACCCCGGAACUGCCUUUGAUACUGGAAGUCUUGAGUAUGGUAGAGGUGAGUGGAAUUCCGAGUGUAGAGGUGAAAUUCGUAGAUAUUCGGAGGAACACCAGUGGCGAAGGCGGCUCACUGGACCAACUGACGCUGAGGUGCGAAAGCGUGGGGAGCAAACAGGAUUAGAUACCCUGGUAGUCCACGCCGUAAACGAUGAAUGUUAGCCGUCGGGGCUUCGGUGGCGCAGCUAACGCAUUAAACAUUCCGCCUGGGGAGUGCGGUCGCAAGAUUAAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGUGGAGCAUGUGGUUUAAUUCGAAGCAACGCGCAGAACCUUACCAGCCCUUGACAUCGACAGGUGCUGCAUGGCUGUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUCGCCCUUAGUUGCCAGCAUGGGCACUCUAAGGGGACUGCCGGUGAUAAGCCGGAGGAAGGUGGGGAUGACGUCAAGUCCUCAUGGCCCUUACGGGCUGGGCUACACACGUGCUACAAUGGUGGUCAGUGGGCAGCGAGCACGCGAGUGUGAGCUAAUCUCCGCCAUCUCAGUUCGGAUGCACUCUGCAACUCGAGUGCAGAAGUUGGAAUCGCUAGUAAUCGCGGAUCAGCAUGCCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCACACCAUGGGAGUUGGUUUUACCCGAAGGCGCUUGCUAGGCAGGCGACCACGGUAGGGUCAGCGACUGGGGUGAAGUCGUAACAAGGUAGCCGUAGGGGAACCUGCGGCUGGAUCACCUCCUUUCU
>NZ_GG739926_647533195
UAAUGGGAGUUUGAUCCUGGCUCAGGAUGAACGCUGGCUACAGGCUUAACACAUGCAAGUCGAGGGACCGGCGCACGGGUGAGUAACGCGUAUCCAACCUUCCCGCGACCAAGGGAUAACCUGCCGAAAGGCAGACUAAUACCUUAUGUCCAAAGUCGGUCACGGAUGGGGAUGCGUCCGAUUAGCUUGUUGGCGGGGCAACGGCCCACCAAGGCAUCGAUCGGUAGGGGUUCUGAGAGGAAGGCCCCCCACACUGGAACUGAGACACGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGUCAAUGGGCGGAAGCCUGAACCAGCCAAGUAGCGUGCAGGACGACGGCCUACGGGUUGUAAACUGCUUUUAUGCGGGGAUAUGCAGGUACCGCAUGAAUAAGGACCGGCUAAUUCCGUGCCAGCAGCCGCGGUAAUACGGAAGGUCCGGGCGUUAUCCGGAUUUAUUGGGUUUAAAGGGAGCGCAGGCCGCCGUGCAAGCGUGCCGUGAAAAGCAGCGGCCCAACCGCUGCCCUGCGGCGCGAACUGCUUGGCUUGAGUGCGCCGGAAGCGGGCGGAAUUCGUGGUGUAGCGGUGAAAUGCUUAGAUAUCACGAAGAACCCCGAUUGCGAAGGCAGCCCGCUGUGGCGACUGACGCUGAGGCUCGAAGGUGCGGGUAUCGAACAGGAUUAGAUACCCUGGUAGUCCGCACGGUAAACGAUGGAUACCCGCUGUCCGGCUCUGGGCGGCCAAGCGAAAGCGUUAAGUAUCCCACCUGGGGAGUACGCCGGCAACGGUGAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGAGGAACAUGUGGUUUAAUUCGAUGAUACGCGAGGAACCUUACCCGGGCUUGAAUUGUGAAGGUGCUGCAUGGUUGUCGUCAGCUCGUGCCGUGAGGUGUCGGCUCAAGUGCCAUAACGAGCGCAACCCCUCUCCGCAGUUGCCAUCGGCCGGGCACUCUGCGGACACUGCCGCCGCAAGGUGGAGGAAGGUGGGGAUGACGUCAAAUCAGCACGGCCCUUACGUCCGGGGCCACACACGUGUUACAAUGGCCGGCAGAGGGCUGUCCGCGCGCAAGUGCGGGUGAAUCCCCUCCGGUCCCAGUUCGGAUGGGGUCUGCAACCCGACCCCAGAAGCUGGAUUCGCUAGUAAUCGCGCAUCAGCCAUGGCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCAAGCCAUGAAAGCCGGGGGUGCCUGAAGUCCGUGUCGGCCUAGGGCAAAACCGGUGAUUGGGGCUAAGUCGUAACAAGGUAGCCGUACCGGAAGGUGCGGCUGGAACACCUCCUUUCU
>NZ_ACIZ01000148_643886127
AAUAUGGAGUUUGAUCCUGGCUCAGGAUGAACGCUGGCGGCGUGCCUAAUACAUGCAAGUCGAACGAGUGGCGGACGGGUGAGUAACACGUGGGUAACCUGCCCUUAAGUGGGGGAUAACAUUUGGAAACAGAUGCUAAUACCGCAUAAAGAAAGUCGCUUUUGGAUGGACCCGCGGCGUAUUAGCUAGUUGGUGAGGUAACGGCUCACCAAGGCAAUGAUACGUAGCCGAACUGAGAGGUUGAUCGGCCACAUUGGGACUGAGACACGGCCCAAACUCCUACGGGAGGCAGCAGUAGGGAAUCUUCCACAAUGGACGCAAGUCUGAUGGAGCAACGCCGCGUGAGUGAAGAAGGCUUUCGGGUCGUAAAACUCUGUUGUUGGAGAAGAUGACGGUAUCCAACCAGAAAGCCACGGCUAACUACGUGCCAGCAGCCGCGGUAAUACGUAGGUGGCAAGCGUUAUCCGGAUUUAUUGGGCGUAAAGCGAGCGCAGGCGGUUUUUUAAGUCUGAUGUGAAAGCCCUCGGCUUAACCGAGGAAGUGCAUCGGAAACUGGGAAACUUGAGUGCAGAAGAGGACAGUGGAACUCCAUGUGUAGCGGUGAAAUGCGUAGAUAUAUGGAAGAACACCAGUGGCGAAGGCGGCUGUCUGGUCUGACUGACGCUGAGGCUCGAAAGCAUGGGUAGCGAACAGGAUUAGAUACCCUGGUAGUCCAUGCCGUAAACGAUGAAUGCUAGGUGUUGGAGCUUCAGUGCCGCAGCUAACGCAUUAAGCAUUCCGCCUGGGGAGUACGACCGCAAGGUUGAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGUGGAGCAUGUGGUUUAAUUCGAAGCAACGCGAAGAACCUUACCAGGUCUUGACAUCGACAGGUGGUGCAUGGUUGUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUUAUGACUAGUUGCCAGCAUGGGCACUCUAGUAAGACUGCCGGUGACAAACCGGAGGAAGGUGGGGAUGACGUCAAAUCAUCAUGCCCCUUAUGACCUGGGCUACACACGUGCUACAAUGGAUGGCAACGAGUUGCGAGACCGCGAGGUCAAGCUAAUCUCUUCCAUUCUCAGUUCGGAUGUAGGCUGCAACUCGCCUACAGAAGUCGGAAUCGCUAGUAAUCGCGGAUCAGCACGCCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCACACCAUGAGAGUUUGUAACACCCGAAGCCGGUGCGUAGCGAGCCGUCUAAGGUGGGACAAAUGAUUAGGGUGAAGUCGUAACAAGGUAGCCGUAGGAGAACCUGCGGCUGGAUCACCUCCUUUCU"""
test_query = test_query.splitlines()

if __name__ == '__main__':
    main()