1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333
|
#!/usr/bin/env python
from cogent.util.unit_test import TestCase, main
from cogent.app.util import get_tmp_filename
from cogent.app.blat import Blat, assign_reads_to_database, \
assign_dna_reads_to_dna_database, \
assign_dna_reads_to_protein_database
from os.path import join, exists
from os import remove
from re import search
__author__ = "Adam Robbins-Pianka"
__copyright__ = "Copyright 2007-2012, The QIIME Project"
__credits__ = ["Adam Robbins-Pianka", "Daniel McDonald"]
__license__ = "GPL"
__version__ = "1.5.3"
__maintainer__ = "Adam Robbins-Pianka"
__email__ = "adam.robbinspianka@colorado.edu"
__status__ = "Prototype"
class BlatTests(TestCase):
files_to_remove = []
def setUp(self):
"""Sets up files for testing.
"""
self.test_db_prot_filename = get_tmp_filename().replace('"', '')
self.test_db_prot = open(self.test_db_prot_filename, 'w')
self.test_db_dna_filename = get_tmp_filename().replace('"', '')
self.test_db_dna = open(self.test_db_dna_filename, 'w')
self.test_query_filename = get_tmp_filename().replace('"', '')
self.test_query = open(self.test_query_filename, 'w')
# write the global variables at the bottom of this file to the
# temporary test files. Can't use file-like objects because the
# external application needs actual files.
self.test_db_prot.write('\n'.join(test_db_prot))
self.test_db_dna.write('\n'.join(test_db_dna))
self.test_query.write('\n'.join(test_query))
# close the files
self.test_db_prot.close()
self.test_db_dna.close()
self.test_query.close()
# prepare output file path
self.testout = get_tmp_filename().replace('"', '')
self.files_to_remove += [self.test_db_prot_filename,
self.test_db_dna_filename,
self.test_query_filename, self.testout]
def tearDown(self):
"""Removes temporary files created during the tests
"""
for filename in self.files_to_remove:
if exists(filename): remove(filename)
def test_assign_reads_to_database(self):
"""Tests that assign_reads_to_database works as expected.
Checks the output file against the expected result when known
database and query files are used.
"""
exp = [l for l in assign_reads_exp if not l.startswith('#')]
obs_lines = assign_reads_to_database(self.test_query_filename,
self.test_db_dna_filename,
self.testout).read().splitlines()
obs = [l for l in obs_lines if not l.startswith('#')]
self.assertEqual(obs, exp)
def test_assign_dna_reads_to_dna_database(self):
"""Tests that assign_dna_reads_to_dna_database works as expected.
Checks the output file against the expected result when known
database and query files are used.
"""
exp = [l for l in assign_reads_exp if not l.startswith('#')]
obs_lines = assign_dna_reads_to_dna_database(self.test_query_filename,
self.test_db_dna_filename,
self.testout).read().splitlines()
obs = [l for l in obs_lines if not l.startswith('#')]
self.assertEqual(obs, exp)
def test_assign_dna_reads_to_protein_database(self):
"""Tests that assign_dna_reads_to_protein_database works as expected.
Checks the output file against the expected result when known
database and query files are used.
"""
exp = [l for l in assign_reads_prot_exp if not l.startswith('#')]
obs_lines = assign_dna_reads_to_protein_database(
self.test_query_filename,
self.test_db_prot_filename,
self.testout).read().splitlines()
obs = [l for l in obs_lines if not l.startswith('#')]
self.assertEqual(obs, exp)
def test_get_base_command(self):
"""Tests that _get_base_command generates the proper command given
various inputs.
"""
test_parameters_blank = {}
files = (self.test_query_filename, self.test_db_dna_filename,
self.testout)
exp_blank = 'blat %s %s %s' % (files[1], files[0], files[2])
# initialize a Blat instance with these parameters and get the
# command string
b = Blat(params = {}, HALT_EXEC=True)
# need to set the positional parameters' values
b._input_as_list(files)
cmd = b._get_base_command()
# find the end of the cd command and trim the base command
cmd_index = search('cd ".+"; ', cmd).end()
cmd = cmd[cmd_index:]
self.assertEqual(cmd, exp_blank)
test_parameters_1 = {
'-t': 'dna',
'-q': 'dna',
'-ooc': '11.ooc',
'-tileSize': 1,
'-stepSize': 2,
'-oneOff': 1,
'-minMatch': 2,
'-minScore': 3,
'-minIdentity': 4,
'-maxGap': 5,
'-makeOoc': 'N.ooc',
'-repMatch': 6,
'-mask': 'lower',
'-qMask': 'lower',
'-repeats': 'lower',
'-minRepDivergence': 7,
'-dots': 8,
'-out': 'psl',
'-maxIntron': 9}
exp_1 = 'blat %s %s ' % (files[1], files[0]) + \
'-dots=8 -makeOoc="N.ooc" -mask=lower -maxGap=5 ' + \
'-maxIntron=9 -minIdentity=4 -minMatch=2 ' + \
'-minRepDivergence=7 -minScore=3 -oneOff=1 -ooc="11.ooc" ' + \
'-out=psl -q=dna -qMask=lower -repMatch=6 -repeats=lower ' + \
'-stepSize=2 -t=dna -tileSize=1 %s' % files[2]
# initialize a Blat instance with these parameters and get the
# command string
b = Blat(params = test_parameters_1, HALT_EXEC=True)
# need to set the positional parameters' values
b._input_as_list(files)
cmd = b._get_base_command()
# find the end of the cd command and trim the base command
cmd_index = search('cd ".+"; ', cmd).end()
cmd = cmd[cmd_index:]
self.assertEqual(cmd, exp_1)
test_parameters_2 = {
'-tileSize': 1,
'-stepSize': 2,
'-minMatch': 2,
'-minScore': 3,
'-minIdentity': 4,
'-maxGap': 5,
'-makeOoc': 'N.ooc',
'-out': 'psl',
'-maxIntron': 9}
exp_2 = 'blat %s %s ' % (files[1], files[0]) + \
'-makeOoc="N.ooc" -maxGap=5 -maxIntron=9 -minIdentity=4 ' + \
'-minMatch=2 -minScore=3 -out=psl -stepSize=2 ' + \
'-tileSize=1 %s' % files[2]
# initialize a Blat instance with these parameters and get the
# command string
b = Blat(params = test_parameters_2, HALT_EXEC=True)
# need to set the positional parameters' values
b._input_as_list(files)
cmd = b._get_base_command()
# find the end of the cd command and trim the base command
cmd_index = search('cd ".+"; ', cmd).end()
cmd = cmd[cmd_index:]
self.assertEqual(cmd, exp_2)
assign_reads_exp = """# BLAT 34 [2006/03/10]
# Query: NZ_GG770509_647533119
# Database: test_db.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_GG770509_647533119 NZ_GG770509_647533119 100.00 1371 0 0 1 1371 1 1371 0.0e+00 2187.0
NZ_GG770509_647533119 NZ_ACIZ01000148_643886127 85.49 634 92 0 336 969 337 970 4.5e-234 807.0
NZ_GG770509_647533119 NZ_ACIZ01000148_643886127 86.08 237 33 0 1135 1371 1137 1373 1.2e-77 287.0
NZ_GG770509_647533119 NZ_ACIZ01000148_643886127 83.12 154 26 0 976 1129 977 1130 2.2e-48 190.0
NZ_GG770509_647533119 NZ_GG739926_647533195 78.42 329 71 0 656 984 657 985 4.8e-97 351.0
NZ_GG770509_647533119 NZ_GG739926_647533195 89.09 110 11 1 1138 1246 1141 1250 1.1e-30 131.0
NZ_GG770509_647533119 NZ_GG739926_647533195 86.96 69 9 0 1021 1089 1023 1091 3.2e-20 96.0
NZ_GG770509_647533119 NZ_GG739926_647533195 75.26 97 22 2 356 450 356 452 2.3e-13 73.0
NZ_GG770509_647533119 NZ_GG739926_647533195 90.57 53 5 0 1319 1371 1315 1367 2.5e-10 63.0
NZ_GG770509_647533119 NZ_GG739926_647533195 81.82 22 4 0 989 1010 992 1013 1.5e+02 24.0
# BLAT 34 [2006/03/10]
# Query: NZ_GG739926_647533195
# Database: test_db.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_GG739926_647533195 NZ_GG739926_647533195 100.00 1367 0 0 1 1367 1 1367 0.0e+00 2235.0
NZ_GG739926_647533195 NZ_ACIZ01000148_643886127 76.22 572 136 0 414 985 414 985 1.7e-158 556.0
NZ_GG739926_647533195 NZ_ACIZ01000148_643886127 76.80 181 42 0 1023 1203 1022 1202 6.4e-53 205.0
NZ_GG739926_647533195 NZ_ACIZ01000148_643886127 96.00 50 2 0 1209 1258 1207 1256 6.4e-14 75.0
NZ_GG739926_647533195 NZ_ACIZ01000148_643886127 88.68 53 6 0 1315 1367 1321 1373 1.6e-09 61.0
NZ_GG739926_647533195 NZ_ACIZ01000148_643886127 77.27 22 5 0 992 1013 990 1011 8.5e+02 22.0
NZ_GG739926_647533195 NZ_GG770509_647533119 79.29 280 58 0 657 936 656 935 9.9e-82 301.0
NZ_GG739926_647533195 NZ_GG770509_647533119 89.09 110 11 1 1141 1250 1138 1246 1.1e-30 131.0
NZ_GG739926_647533195 NZ_GG770509_647533119 86.96 69 9 0 1023 1091 1021 1089 3.2e-20 96.0
NZ_GG739926_647533195 NZ_GG770509_647533119 75.26 97 22 2 356 452 356 450 2.3e-13 73.0
NZ_GG739926_647533195 NZ_GG770509_647533119 90.57 53 5 0 1315 1367 1319 1371 2.5e-10 63.0
NZ_GG739926_647533195 NZ_GG770509_647533119 80.00 30 6 0 956 985 955 984 1.2e-03 41.0
NZ_GG739926_647533195 NZ_GG770509_647533119 81.82 22 4 0 992 1013 989 1010 1.5e+02 24.0
# BLAT 34 [2006/03/10]
# Query: NZ_ACIZ01000148_643886127
# Database: test_db.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_ACIZ01000148_643886127 NZ_ACIZ01000148_643886127 100.00 1373 0 0 1 1373 1 1373 0.0e+00 2165.0
NZ_ACIZ01000148_643886127 NZ_GG770509_647533119 85.49 634 92 0 337 970 336 969 4.5e-234 807.0
NZ_ACIZ01000148_643886127 NZ_GG770509_647533119 86.08 237 33 0 1137 1373 1135 1371 1.2e-77 287.0
NZ_ACIZ01000148_643886127 NZ_GG770509_647533119 83.12 154 26 0 977 1130 976 1129 2.2e-48 190.0
NZ_ACIZ01000148_643886127 NZ_GG739926_647533195 76.22 572 136 0 414 985 414 985 1.7e-158 556.0
NZ_ACIZ01000148_643886127 NZ_GG739926_647533195 76.80 181 42 0 1022 1202 1023 1203 6.4e-53 205.0
NZ_ACIZ01000148_643886127 NZ_GG739926_647533195 96.00 50 2 0 1207 1256 1209 1258 6.4e-14 75.0
NZ_ACIZ01000148_643886127 NZ_GG739926_647533195 88.68 53 6 0 1321 1373 1315 1367 1.6e-09 61.0
NZ_ACIZ01000148_643886127 NZ_GG739926_647533195 77.27 22 5 0 990 1011 992 1013 8.5e+02 22.0
""".splitlines()
assign_reads_prot_exp = """# BLAT 34x13 [2009/02/26]
# Query: NZ_GG770509_647533119_frame_1
# Database: /home/adro2179/metagenome/test_db_prot.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_GG770509_647533119_frame_1 NZ_GG770509_647533119 96.83 441 0 7 1 427 1 441 8.9e-254 872.0
# BLAT 34x13 [2009/02/26]
# Query: NZ_GG770509_647533119_frame_2
# Database: /home/adro2179/metagenome/test_db_prot.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_GG770509_647533119_frame_2 NZ_ACIZ01000148_643886127 85.37 41 6 0 359 399 362 402 8.0e-13 72.0
NZ_GG770509_647533119_frame_2 NZ_ACIZ01000148_643886127 93.75 16 1 0 419 434 421 436 1.3e+00 31.0
NZ_GG770509_647533119_frame_2 NZ_GG739926_647533195 75.86 29 7 0 320 348 326 354 2.9e-04 43.0
# BLAT 34x13 [2009/02/26]
# Query: NZ_GG770509_647533119_frame_3
# Database: /home/adro2179/metagenome/test_db_prot.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_GG770509_647533119_frame_3 NZ_ACIZ01000148_643886127 80.61 98 19 0 210 307 209 306 7.5e-39 158.0
NZ_GG770509_647533119_frame_3 NZ_ACIZ01000148_643886127 66.33 98 33 0 43 140 44 141 8.9e-27 118.0
NZ_GG770509_647533119_frame_3 NZ_ACIZ01000148_643886127 78.95 38 8 0 310 347 308 345 2.3e-08 57.0
NZ_GG770509_647533119_frame_3 NZ_ACIZ01000148_643886127 66.67 30 10 0 178 207 178 207 2.5e-01 33.0
NZ_GG770509_647533119_frame_3 NZ_GG739926_647533195 53.00 100 47 0 131 230 134 233 1.9e-18 90.0
NZ_GG770509_647533119_frame_3 NZ_GG739926_647533195 68.89 45 14 0 238 282 241 285 5.9e-09 59.0
NZ_GG770509_647533119_frame_3 NZ_GG739926_647533195 72.09 43 12 0 63 105 66 108 3.0e-08 56.0
# BLAT 34x13 [2009/02/26]
# Query: NZ_GG739926_647533195_frame_1
# Database: /home/adro2179/metagenome/test_db_prot.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_GG739926_647533195_frame_1 NZ_GG739926_647533195 100.00 437 0 0 1 437 1 437 1.7e-263 904.0
NZ_GG739926_647533195_frame_1 NZ_ACIZ01000148_643886127 69.86 73 22 0 213 285 209 281 1.1e-20 98.0
NZ_GG739926_647533195_frame_1 NZ_ACIZ01000148_643886127 53.33 60 28 0 148 207 145 204 1.3e-06 51.0
NZ_GG739926_647533195_frame_1 NZ_ACIZ01000148_643886127 60.53 38 15 0 66 103 64 101 1.9e-03 41.0
NZ_GG739926_647533195_frame_1 NZ_ACIZ01000148_643886127 76.92 26 6 0 2 27 3 28 9.7e-03 38.0
NZ_GG739926_647533195_frame_1 NZ_ACIZ01000148_643886127 69.57 23 7 0 288 310 285 307 4.8e+00 29.0
NZ_GG739926_647533195_frame_1 NZ_ACIZ01000148_643886127 90.00 10 1 0 134 143 132 141 1.6e+04 18.0
# BLAT 34x13 [2009/02/26]
# Query: NZ_GG739926_647533195_frame_2
# Database: /home/adro2179/metagenome/test_db_prot.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_GG739926_647533195_frame_2 NZ_GG770509_647533119 66.67 42 14 0 270 311 276 317 2.3e-08 57.0
NZ_GG739926_647533195_frame_2 NZ_GG770509_647533119 60.00 45 18 0 185 229 188 232 3.9e-06 49.0
NZ_GG739926_647533195_frame_2 NZ_GG770509_647533119 80.00 20 4 0 247 266 251 270 5.6e-01 32.0
# BLAT 34x13 [2009/02/26]
# Query: NZ_GG739926_647533195_frame_3
# Database: /home/adro2179/metagenome/test_db_prot.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_GG739926_647533195_frame_3 NZ_ACIZ01000148_643886127 94.44 18 1 0 390 407 385 402 4.3e-03 39.0
# BLAT 34x13 [2009/02/26]
# Query: NZ_ACIZ01000148_643886127_frame_1
# Database: /home/adro2179/metagenome/test_db_prot.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_ACIZ01000148_643886127_frame_1 NZ_ACIZ01000148_643886127 100.00 436 0 0 1 436 1 436 2.1e-261 897.0
NZ_ACIZ01000148_643886127_frame_1 NZ_GG739926_647533195 78.57 42 9 0 240 281 244 285 4.0e-10 63.0
NZ_ACIZ01000148_643886127_frame_1 NZ_GG739926_647533195 60.53 38 15 0 64 101 66 103 1.9e-03 41.0
NZ_ACIZ01000148_643886127_frame_1 NZ_GG739926_647533195 76.92 26 6 0 3 28 2 27 9.7e-03 38.0
NZ_ACIZ01000148_643886127_frame_1 NZ_GG739926_647533195 69.57 23 7 0 285 307 288 310 4.8e+00 29.0
# BLAT 34x13 [2009/02/26]
# Query: NZ_ACIZ01000148_643886127_frame_2
# Database: /home/adro2179/metagenome/test_db_prot.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_ACIZ01000148_643886127_frame_2 NZ_GG770509_647533119 79.59 147 26 2 182 324 189 335 2.3e-61 233.0
NZ_ACIZ01000148_643886127_frame_2 NZ_GG770509_647533119 72.73 33 9 0 128 160 137 169 5.0e-04 42.0
NZ_ACIZ01000148_643886127_frame_2 NZ_GG770509_647533119 90.91 22 2 0 70 91 76 97 2.5e-03 40.0
# BLAT 34x13 [2009/02/26]
# Query: NZ_ACIZ01000148_643886127_frame_3
# Database: /home/adro2179/metagenome/test_db_prot.fasta
# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score
NZ_ACIZ01000148_643886127_frame_3 NZ_GG770509_647533119 84.21 38 4 1 360 395 367 404 3.0e-08 56.0
NZ_ACIZ01000148_643886127_frame_3 NZ_GG770509_647533119 94.12 17 1 0 413 429 425 441 1.6e+00 31.0
NZ_ACIZ01000148_643886127_frame_3 NZ_GG739926_647533195 78.57 28 5 1 321 347 326 353 1.5e-03 41.0"""
assign_reads_prot_exp = assign_reads_prot_exp.splitlines()
test_db_prot = """>NZ_GG770509_647533119
YLEFDPGSERTLAAGLTHASRASGRRVSNAWERTICYGITQGNLCYRMetWKVGKSARVGLASWWGKGSPRRRSIAGLRGSATLGLRHGPDSYGRQQWGILDNGRKPDPAMetPRERPGCKALSPVKMetTVTGEEAPANFVPAAAVIRRGLALFGFTGRKAHVGGLLSQGNPGAQPRNCLYWKSVWRVEFRVRNSIFGGTPVAKAAHWTNRGAKAWGANRIRYPGSPRRKRMetLAVGASVAQLTHTFRLGSAVARLKLKGIDGGPHKRWSMetWFNSKQRAEPYQPLTSTGAAWLSSARVVRCWVKSRNERNPRPLPAWALGDCRAGGRWGRQVLMetALTGWATHVLQWWSVGSEHASVSSPPSQFGCTLQLECRSWNRSRISMetPRIRSRALYTPPVTPWELVLPEGACAGDHGRVSDWGEVVTRPGNLRLDHLLS
>NZ_GG739926_647533195
WEFDPGSGTLATGLTHASRGTGARVSNAYPTFPRPRDNLPKGRLIPYVQSRSRMGMRPISLLAGQRPTKASIGRGSERKAPHTGTETRSRLLREAAVRNIGQWAEATSQVACRTTAYGLTAFMRGYAGTAIRTGFRASSRGNTEGPGVIRIYWVRERRPPCKRAVKSSGPTAALRRELLGLSAPEAGGIRGVAVKCLDITKNPDCEGSPLWRLTLRLEGAGIEQDIPWSARTMDTRCPALGGQAKALSIPPGEYAGNGETQRNRGPAQAEEHVVFDDTRGTLPGLELRCCMVVVSSCREVSAQVPRAQPLSAVAIGRALCGHCRRKVEEGGDDVKSARPLRPGPHTCYNGRQRAVRAQVRVNPLRSQFGWGLQPDPRSWIRSRISHGAVNTFPGLVHTARQAMKAGGASPCRPRAKPVIGAKSQGSRTGRCGWNTSF
>NZ_ACIZ01000148_643886127
NMEFDPGSGTLAACLIHASRTSGGRVSNTWVTCPVGDNIWKQMLIPHKESRFWMDPRRISLVRRLTKAMIRSRTERLIGHIGTETRPKLLREAAVGNLPQWTQVWSNAAVKKAFGSNSVVGEDDGIQPESHGLRASSRGNTVASVIRIYWASERRRFFKSDVKALGLTEEVHRKLGNLSAEEDSGTPCVAVKCVDIWKNTSGEGGCLVLTLRLESMGSEQDIPWSMPTMNARCWSFSAAANALSIPPGEYDRKVETQRNRGPAQAVEHVVFEATRRTLPGLDIDRWCMVVVSSCREMLGVPQRAQPLLVASMGTLVRLPVTNRRKVGMTSNHHAPYDLGYTRATMDGNELRDREVKLISSILSSDVGCNSPTEVGIASNRGSARRGEYVPGPCTHRPSHHESLHPKPVRSEPSKVGQMIRVKSQGSRRRTCGWITS"""
test_db_prot = test_db_prot.splitlines()
test_db_dna = """>NZ_GG770509_647533119
UACUUGGAGUUUGAUCCUGGCUCAGAACGAACGCUGGCGGCAGGCUUAACACAUGCAAGUCGAGCGAGCGGCAGACGGGUGAGUAACGCGUGGGAACGUACCAUUUGCUACGGAAUAACUCAGGGAAACUUGUGCUAAUACCGUAUGUGGAAAGUCGGCAAAUGAUCGGCCCGCGUUGGAUUAGCUAGUUGGUGGGGUAAAGGCUCACCAAGGCGACGAUCCAUAGCUGGUCUGAGAGGAUGAUCAGCCACACUGGGACUGAGACACGGCCCAGACUCCUACGGGAGGCAGCAGUGGGGAAUAUUGGACAAUGGGCGCAAGCCUGAUCCAGCCAUGCCGCGUGAGUGAUGAAGGCCCUAGGGUUGUAAAGCUCUUUCACCGGUGAAGAUGACGGUAACCGGAGAAGAAGCCCCGGCUAACUUCGUGCCAGCAGCCGCGGUAAUACGAAGGGGGCUAGCGUUGUUCGGAUUUACUGGGCGUAAAGCGCACGUAGGCGGACUUUUAAGUCAGGGGUGAAAUCCCGGGGCUCAACCCCGGAACUGCCUUUGAUACUGGAAGUCUUGAGUAUGGUAGAGGUGAGUGGAAUUCCGAGUGUAGAGGUGAAAUUCGUAGAUAUUCGGAGGAACACCAGUGGCGAAGGCGGCUCACUGGACCAACUGACGCUGAGGUGCGAAAGCGUGGGGAGCAAACAGGAUUAGAUACCCUGGUAGUCCACGCCGUAAACGAUGAAUGUUAGCCGUCGGGGCUUCGGUGGCGCAGCUAACGCAUUAAACAUUCCGCCUGGGGAGUGCGGUCGCAAGAUUAAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGUGGAGCAUGUGGUUUAAUUCGAAGCAACGCGCAGAACCUUACCAGCCCUUGACAUCGACAGGUGCUGCAUGGCUGUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUCGCCCUUAGUUGCCAGCAUGGGCACUCUAAGGGGACUGCCGGUGAUAAGCCGGAGGAAGGUGGGGAUGACGUCAAGUCCUCAUGGCCCUUACGGGCUGGGCUACACACGUGCUACAAUGGUGGUCAGUGGGCAGCGAGCACGCGAGUGUGAGCUAAUCUCCGCCAUCUCAGUUCGGAUGCACUCUGCAACUCGAGUGCAGAAGUUGGAAUCGCUAGUAAUCGCGGAUCAGCAUGCCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCACACCAUGGGAGUUGGUUUUACCCGAAGGCGCUUGCUAGGCAGGCGACCACGGUAGGGUCAGCGACUGGGGUGAAGUCGUAACAAGGUAGCCGUAGGGGAACCUGCGGCUGGAUCACCUCCUUUCU
>NZ_GG739926_647533195
UAAUGGGAGUUUGAUCCUGGCUCAGGAUGAACGCUGGCUACAGGCUUAACACAUGCAAGUCGAGGGACCGGCGCACGGGUGAGUAACGCGUAUCCAACCUUCCCGCGACCAAGGGAUAACCUGCCGAAAGGCAGACUAAUACCUUAUGUCCAAAGUCGGUCACGGAUGGGGAUGCGUCCGAUUAGCUUGUUGGCGGGGCAACGGCCCACCAAGGCAUCGAUCGGUAGGGGUUCUGAGAGGAAGGCCCCCCACACUGGAACUGAGACACGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGUCAAUGGGCGGAAGCCUGAACCAGCCAAGUAGCGUGCAGGACGACGGCCUACGGGUUGUAAACUGCUUUUAUGCGGGGAUAUGCAGGUACCGCAUGAAUAAGGACCGGCUAAUUCCGUGCCAGCAGCCGCGGUAAUACGGAAGGUCCGGGCGUUAUCCGGAUUUAUUGGGUUUAAAGGGAGCGCAGGCCGCCGUGCAAGCGUGCCGUGAAAAGCAGCGGCCCAACCGCUGCCCUGCGGCGCGAACUGCUUGGCUUGAGUGCGCCGGAAGCGGGCGGAAUUCGUGGUGUAGCGGUGAAAUGCUUAGAUAUCACGAAGAACCCCGAUUGCGAAGGCAGCCCGCUGUGGCGACUGACGCUGAGGCUCGAAGGUGCGGGUAUCGAACAGGAUUAGAUACCCUGGUAGUCCGCACGGUAAACGAUGGAUACCCGCUGUCCGGCUCUGGGCGGCCAAGCGAAAGCGUUAAGUAUCCCACCUGGGGAGUACGCCGGCAACGGUGAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGAGGAACAUGUGGUUUAAUUCGAUGAUACGCGAGGAACCUUACCCGGGCUUGAAUUGUGAAGGUGCUGCAUGGUUGUCGUCAGCUCGUGCCGUGAGGUGUCGGCUCAAGUGCCAUAACGAGCGCAACCCCUCUCCGCAGUUGCCAUCGGCCGGGCACUCUGCGGACACUGCCGCCGCAAGGUGGAGGAAGGUGGGGAUGACGUCAAAUCAGCACGGCCCUUACGUCCGGGGCCACACACGUGUUACAAUGGCCGGCAGAGGGCUGUCCGCGCGCAAGUGCGGGUGAAUCCCCUCCGGUCCCAGUUCGGAUGGGGUCUGCAACCCGACCCCAGAAGCUGGAUUCGCUAGUAAUCGCGCAUCAGCCAUGGCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCAAGCCAUGAAAGCCGGGGGUGCCUGAAGUCCGUGUCGGCCUAGGGCAAAACCGGUGAUUGGGGCUAAGUCGUAACAAGGUAGCCGUACCGGAAGGUGCGGCUGGAACACCUCCUUUCU
>NZ_ACIZ01000148_643886127
AAUAUGGAGUUUGAUCCUGGCUCAGGAUGAACGCUGGCGGCGUGCCUAAUACAUGCAAGUCGAACGAGUGGCGGACGGGUGAGUAACACGUGGGUAACCUGCCCUUAAGUGGGGGAUAACAUUUGGAAACAGAUGCUAAUACCGCAUAAAGAAAGUCGCUUUUGGAUGGACCCGCGGCGUAUUAGCUAGUUGGUGAGGUAACGGCUCACCAAGGCAAUGAUACGUAGCCGAACUGAGAGGUUGAUCGGCCACAUUGGGACUGAGACACGGCCCAAACUCCUACGGGAGGCAGCAGUAGGGAAUCUUCCACAAUGGACGCAAGUCUGAUGGAGCAACGCCGCGUGAGUGAAGAAGGCUUUCGGGUCGUAAAACUCUGUUGUUGGAGAAGAUGACGGUAUCCAACCAGAAAGCCACGGCUAACUACGUGCCAGCAGCCGCGGUAAUACGUAGGUGGCAAGCGUUAUCCGGAUUUAUUGGGCGUAAAGCGAGCGCAGGCGGUUUUUUAAGUCUGAUGUGAAAGCCCUCGGCUUAACCGAGGAAGUGCAUCGGAAACUGGGAAACUUGAGUGCAGAAGAGGACAGUGGAACUCCAUGUGUAGCGGUGAAAUGCGUAGAUAUAUGGAAGAACACCAGUGGCGAAGGCGGCUGUCUGGUCUGACUGACGCUGAGGCUCGAAAGCAUGGGUAGCGAACAGGAUUAGAUACCCUGGUAGUCCAUGCCGUAAACGAUGAAUGCUAGGUGUUGGAGCUUCAGUGCCGCAGCUAACGCAUUAAGCAUUCCGCCUGGGGAGUACGACCGCAAGGUUGAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGUGGAGCAUGUGGUUUAAUUCGAAGCAACGCGAAGAACCUUACCAGGUCUUGACAUCGACAGGUGGUGCAUGGUUGUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUUAUGACUAGUUGCCAGCAUGGGCACUCUAGUAAGACUGCCGGUGACAAACCGGAGGAAGGUGGGGAUGACGUCAAAUCAUCAUGCCCCUUAUGACCUGGGCUACACACGUGCUACAAUGGAUGGCAACGAGUUGCGAGACCGCGAGGUCAAGCUAAUCUCUUCCAUUCUCAGUUCGGAUGUAGGCUGCAACUCGCCUACAGAAGUCGGAAUCGCUAGUAAUCGCGGAUCAGCACGCCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCACACCAUGAGAGUUUGUAACACCCGAAGCCGGUGCGUAGCGAGCCGUCUAAGGUGGGACAAAUGAUUAGGGUGAAGUCGUAACAAGGUAGCCGUAGGAGAACCUGCGGCUGGAUCACCUCCUUUCU"""
test_db_dna = test_db_dna.splitlines()
test_query = """>NZ_GG770509_647533119
UACUUGGAGUUUGAUCCUGGCUCAGAACGAACGCUGGCGGCAGGCUUAACACAUGCAAGUCGAGCGAGCGGCAGACGGGUGAGUAACGCGUGGGAACGUACCAUUUGCUACGGAAUAACUCAGGGAAACUUGUGCUAAUACCGUAUGUGGAAAGUCGGCAAAUGAUCGGCCCGCGUUGGAUUAGCUAGUUGGUGGGGUAAAGGCUCACCAAGGCGACGAUCCAUAGCUGGUCUGAGAGGAUGAUCAGCCACACUGGGACUGAGACACGGCCCAGACUCCUACGGGAGGCAGCAGUGGGGAAUAUUGGACAAUGGGCGCAAGCCUGAUCCAGCCAUGCCGCGUGAGUGAUGAAGGCCCUAGGGUUGUAAAGCUCUUUCACCGGUGAAGAUGACGGUAACCGGAGAAGAAGCCCCGGCUAACUUCGUGCCAGCAGCCGCGGUAAUACGAAGGGGGCUAGCGUUGUUCGGAUUUACUGGGCGUAAAGCGCACGUAGGCGGACUUUUAAGUCAGGGGUGAAAUCCCGGGGCUCAACCCCGGAACUGCCUUUGAUACUGGAAGUCUUGAGUAUGGUAGAGGUGAGUGGAAUUCCGAGUGUAGAGGUGAAAUUCGUAGAUAUUCGGAGGAACACCAGUGGCGAAGGCGGCUCACUGGACCAACUGACGCUGAGGUGCGAAAGCGUGGGGAGCAAACAGGAUUAGAUACCCUGGUAGUCCACGCCGUAAACGAUGAAUGUUAGCCGUCGGGGCUUCGGUGGCGCAGCUAACGCAUUAAACAUUCCGCCUGGGGAGUGCGGUCGCAAGAUUAAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGUGGAGCAUGUGGUUUAAUUCGAAGCAACGCGCAGAACCUUACCAGCCCUUGACAUCGACAGGUGCUGCAUGGCUGUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUCGCCCUUAGUUGCCAGCAUGGGCACUCUAAGGGGACUGCCGGUGAUAAGCCGGAGGAAGGUGGGGAUGACGUCAAGUCCUCAUGGCCCUUACGGGCUGGGCUACACACGUGCUACAAUGGUGGUCAGUGGGCAGCGAGCACGCGAGUGUGAGCUAAUCUCCGCCAUCUCAGUUCGGAUGCACUCUGCAACUCGAGUGCAGAAGUUGGAAUCGCUAGUAAUCGCGGAUCAGCAUGCCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCACACCAUGGGAGUUGGUUUUACCCGAAGGCGCUUGCUAGGCAGGCGACCACGGUAGGGUCAGCGACUGGGGUGAAGUCGUAACAAGGUAGCCGUAGGGGAACCUGCGGCUGGAUCACCUCCUUUCU
>NZ_GG739926_647533195
UAAUGGGAGUUUGAUCCUGGCUCAGGAUGAACGCUGGCUACAGGCUUAACACAUGCAAGUCGAGGGACCGGCGCACGGGUGAGUAACGCGUAUCCAACCUUCCCGCGACCAAGGGAUAACCUGCCGAAAGGCAGACUAAUACCUUAUGUCCAAAGUCGGUCACGGAUGGGGAUGCGUCCGAUUAGCUUGUUGGCGGGGCAACGGCCCACCAAGGCAUCGAUCGGUAGGGGUUCUGAGAGGAAGGCCCCCCACACUGGAACUGAGACACGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGUCAAUGGGCGGAAGCCUGAACCAGCCAAGUAGCGUGCAGGACGACGGCCUACGGGUUGUAAACUGCUUUUAUGCGGGGAUAUGCAGGUACCGCAUGAAUAAGGACCGGCUAAUUCCGUGCCAGCAGCCGCGGUAAUACGGAAGGUCCGGGCGUUAUCCGGAUUUAUUGGGUUUAAAGGGAGCGCAGGCCGCCGUGCAAGCGUGCCGUGAAAAGCAGCGGCCCAACCGCUGCCCUGCGGCGCGAACUGCUUGGCUUGAGUGCGCCGGAAGCGGGCGGAAUUCGUGGUGUAGCGGUGAAAUGCUUAGAUAUCACGAAGAACCCCGAUUGCGAAGGCAGCCCGCUGUGGCGACUGACGCUGAGGCUCGAAGGUGCGGGUAUCGAACAGGAUUAGAUACCCUGGUAGUCCGCACGGUAAACGAUGGAUACCCGCUGUCCGGCUCUGGGCGGCCAAGCGAAAGCGUUAAGUAUCCCACCUGGGGAGUACGCCGGCAACGGUGAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGAGGAACAUGUGGUUUAAUUCGAUGAUACGCGAGGAACCUUACCCGGGCUUGAAUUGUGAAGGUGCUGCAUGGUUGUCGUCAGCUCGUGCCGUGAGGUGUCGGCUCAAGUGCCAUAACGAGCGCAACCCCUCUCCGCAGUUGCCAUCGGCCGGGCACUCUGCGGACACUGCCGCCGCAAGGUGGAGGAAGGUGGGGAUGACGUCAAAUCAGCACGGCCCUUACGUCCGGGGCCACACACGUGUUACAAUGGCCGGCAGAGGGCUGUCCGCGCGCAAGUGCGGGUGAAUCCCCUCCGGUCCCAGUUCGGAUGGGGUCUGCAACCCGACCCCAGAAGCUGGAUUCGCUAGUAAUCGCGCAUCAGCCAUGGCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCAAGCCAUGAAAGCCGGGGGUGCCUGAAGUCCGUGUCGGCCUAGGGCAAAACCGGUGAUUGGGGCUAAGUCGUAACAAGGUAGCCGUACCGGAAGGUGCGGCUGGAACACCUCCUUUCU
>NZ_ACIZ01000148_643886127
AAUAUGGAGUUUGAUCCUGGCUCAGGAUGAACGCUGGCGGCGUGCCUAAUACAUGCAAGUCGAACGAGUGGCGGACGGGUGAGUAACACGUGGGUAACCUGCCCUUAAGUGGGGGAUAACAUUUGGAAACAGAUGCUAAUACCGCAUAAAGAAAGUCGCUUUUGGAUGGACCCGCGGCGUAUUAGCUAGUUGGUGAGGUAACGGCUCACCAAGGCAAUGAUACGUAGCCGAACUGAGAGGUUGAUCGGCCACAUUGGGACUGAGACACGGCCCAAACUCCUACGGGAGGCAGCAGUAGGGAAUCUUCCACAAUGGACGCAAGUCUGAUGGAGCAACGCCGCGUGAGUGAAGAAGGCUUUCGGGUCGUAAAACUCUGUUGUUGGAGAAGAUGACGGUAUCCAACCAGAAAGCCACGGCUAACUACGUGCCAGCAGCCGCGGUAAUACGUAGGUGGCAAGCGUUAUCCGGAUUUAUUGGGCGUAAAGCGAGCGCAGGCGGUUUUUUAAGUCUGAUGUGAAAGCCCUCGGCUUAACCGAGGAAGUGCAUCGGAAACUGGGAAACUUGAGUGCAGAAGAGGACAGUGGAACUCCAUGUGUAGCGGUGAAAUGCGUAGAUAUAUGGAAGAACACCAGUGGCGAAGGCGGCUGUCUGGUCUGACUGACGCUGAGGCUCGAAAGCAUGGGUAGCGAACAGGAUUAGAUACCCUGGUAGUCCAUGCCGUAAACGAUGAAUGCUAGGUGUUGGAGCUUCAGUGCCGCAGCUAACGCAUUAAGCAUUCCGCCUGGGGAGUACGACCGCAAGGUUGAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGUGGAGCAUGUGGUUUAAUUCGAAGCAACGCGAAGAACCUUACCAGGUCUUGACAUCGACAGGUGGUGCAUGGUUGUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUUAUGACUAGUUGCCAGCAUGGGCACUCUAGUAAGACUGCCGGUGACAAACCGGAGGAAGGUGGGGAUGACGUCAAAUCAUCAUGCCCCUUAUGACCUGGGCUACACACGUGCUACAAUGGAUGGCAACGAGUUGCGAGACCGCGAGGUCAAGCUAAUCUCUUCCAUUCUCAGUUCGGAUGUAGGCUGCAACUCGCCUACAGAAGUCGGAAUCGCUAGUAAUCGCGGAUCAGCACGCCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCACACCAUGAGAGUUUGUAACACCCGAAGCCGGUGCGUAGCGAGCCGUCUAAGGUGGGACAAAUGAUUAGGGUGAAGUCGUAACAAGGUAGCCGUAGGAGAACCUGCGGCUGGAUCACCUCCUUUCU"""
test_query = test_query.splitlines()
if __name__ == '__main__':
main()
|