1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
|
#!/usr/bin/env python
from cogent.util.unit_test import TestCase, main
from os.path import join, exists
from os import remove, getcwd
from cogent.app.bwa import BWA, BWA_index, BWA_aln, BWA_samse, \
BWA_sampe, BWA_bwasw, create_bwa_index_from_fasta_file, \
assign_reads_to_database
from cogent.app.util import get_tmp_filename, ApplicationError
__author__ = "Adam Robbins-Pianka"
__copyright__ = "Copyright 2007-2012, The Cogent Project"
__credits__ = ["Adam Robbins-Pianka", "Daniel McDonald", "Jai Ram Rideout"]
__license__ = "GPL"
__version__ = "1.5.3"
__maintainer__ = "Adam Robbins-Pianka"
__email__ = "adam.robbinspianka@colorado.edu"
__status__ = "Production"
class BWAtests(TestCase):
"""Tests for the BWA app controller
"""
# keeps track of which files are created during the tests so that they
# can be removed during tearDown
files_to_remove = []
def setUp(self):
"""Performs setup for the tests.
Nothing to set up for these tests.
"""
pass
def tearDown(self):
"""Properly and politely terminates the test.
Removes files created during the tests.
"""
for f in self.files_to_remove:
if exists(f):
remove(f)
def test_check_arguments(self):
"""Tests the "check_arguments" method of the BWA base class.
Arguments passed to certain parameters of the various subcommands can
take only certain values. The check_arguments function enforces these
constraints. This function ensures that the rules are being enforced
as expected.
"""
# set up test parameters
# should pass
index_params_is = {'-a': 'is'}
# should pass
index_params_bwtsw = {'-a': 'bwtsw'}
# should fail, -a must be one of "is" or "bwtsw"
index_params_invalid = {'-a': 'invalid'}
# should fail, -p must specify a prefix that is an absolute path
index_params_invalid_prefix = {'-p': 'invalid'}
# should pass
index_params_valid_prefix = {'-p': '/prefix'}
# instantiate objects built from the above parameters
index_is = BWA_index(params=index_params_is, HALT_EXEC=True)
index_bwtsw = BWA_index(params=index_params_bwtsw, HALT_EXEC=True)
index_invalid = BWA_index(params=index_params_invalid, HALT_EXEC=True)
index_invalid_prefix = BWA_index(params=index_params_invalid_prefix, \
HALT_EXEC=True)
index_valid_prefix = BWA_index(params=index_params_valid_prefix, \
HALT_EXEC=True)
# Should not be allowed
self.assertRaisesRegexp(ApplicationError, "Invalid argument",
index_invalid.check_arguments)
self.assertRaisesRegexp(ApplicationError, "Invalid argument",
index_invalid_prefix.check_arguments)
# Should execute and not raise any exceptions
index_is.check_arguments()
index_bwtsw.check_arguments()
index_valid_prefix.check_arguments()
# The rest of the _valid_arguments are for checking is_int and is_float
# and they all use the same function from the base-class, so testing
# just one of the subcommands should suffice
# -n must be a float (expressed either as a float or as a string)
# -o must be an int (expressed either as an int or as a string)
# pass, both valid
aln_params_valid = {'-n': 3.0, '-o': 5, '-f':'/sai_out'}
# fail, second invalid
aln_params_invalid1 = {'-n': 3.0, '-o': 'nope', '-f':'/sai_out'}
# fail, first invalid
aln_params_invalid2 = {'-n': '3.5.1', '-o': 4, '-f':'/sai_out'}
# fail, did not specify -f
aln_params_invalid3 = {'-n': 3.0, '-o': 5}
# instantiate objects
aln_valid = BWA_aln(params=aln_params_valid, HALT_EXEC=True)
aln_invalid1 = BWA_aln(params=aln_params_invalid1, HALT_EXEC=True)
aln_invalid2 = BWA_aln(params=aln_params_invalid2, HALT_EXEC=True)
aln_invalid3 = BWA_aln(params=aln_params_invalid3, HALT_EXEC=True)
test_paths = {'prefix': '/fa_in', 'fastq_in': '/fq_in'}
# Should Halt Exec (AssertionError) right before execution
self.assertRaisesRegexp(AssertionError, 'Halted exec', aln_valid,
test_paths)
# also need to make sure the base command is correct
self.assertIn('; bwa aln -f /sai_out -n 3.0 -o 5 /fa_in /fq_in',
aln_valid.BaseCommand)
# Should fail
self.assertRaisesRegexp(ApplicationError,
"Invalid argument", aln_invalid1,
test_paths)
self.assertRaisesRegexp(ApplicationError,
"Invalid argument", aln_invalid2,
test_paths)
self.assertRaisesRegexp(ApplicationError,
"Please specify an output file",
aln_invalid3, test_paths)
def test_input_as_dict(self):
"""Tests the input handler (_input_as_dict)
The input handler should throw exceptions if there are not enough
arguments, or if there are unrecognized arguments, or if a file path
appears to be a relative filepath.
"""
# Arguments for BWA_bwasw, which was chosen since it is the only one
# that also has an optional argument (optional arguments are denoted
# by a leading underscore)
missing = {'prefix':'/fa_in', '_query_fasta_2': '/mate'}
extra = {'prefix':'/fa_in', 'query_fasta':'/query_fasta',
'extra':'/param'}
rel_fp = {'prefix':'fa_in', 'query_fasta':'/query_fasta'}
valid = {'prefix':'/fa_in', 'query_fasta':'/query_fasta'}
valid_with_mate = {'prefix':'/fa_in', 'query_fasta':'/query_fasta',
'_query_fasta_2':'/mate'}
# instantiate the object
bwasw = BWA_bwasw(params={'-f':'/sam_out'}, HALT_EXEC=True)
# should raise ApplicationError for wrong I/O files; failure
self.assertRaisesRegexp(ApplicationError, "Missing required input",
bwasw, missing)
self.assertRaisesRegexp(ApplicationError, "Invalid input arguments",
bwasw, extra)
self.assertRaisesRegexp(ApplicationError, "Only absolute paths",
bwasw, rel_fp)
# should raise AssertionError (Halt Exec); success
# tests valid arguments with and without the optional
# _query_fasta_2 argument
self.assertRaisesRegexp(AssertionError, 'Halted exec', bwasw, valid)
self.assertRaisesRegexp(AssertionError, 'Halted exec', bwasw,
valid_with_mate)
def test_get_base_command(self):
"""Tests the function that generates the command string.
Tests whether an object can be instantiated and then called using
one set of files, and then another set of files.
Since the structure of the various sublcasses is consistent, testing
that the correct command is generated by one of the subclasses should
suffice here.
"""
# instantiate one instance
aln = BWA_aln(params = {'-n': 1.0, '-f':'/sai_out'}, HALT_EXEC=True)
# set up two different sets of files
first_files = {'prefix':'/fa_in1', 'fastq_in':'/fq_in1'}
second_files = {'prefix':'/fa_in2', 'fastq_in':'/fq_in2'}
# make sure both sets run, and that the command appears to be correct
self.assertRaisesRegexp(AssertionError,
'Halted exec', aln, first_files)
self.assertIn('; bwa aln -f /sai_out -n 1.0 /fa_in1 /fq_in1',
aln.BaseCommand)
self.assertRaisesRegexp(AssertionError, 'Halted exec', aln,
second_files)
self.assertIn('; bwa aln -f /sai_out -n 1.0 /fa_in2 /fq_in2',
aln.BaseCommand)
# instantiate another object, to test that there is no cross-talk
# between instances with the same baseclass
aln2 = BWA_aln(params = {'-n': 2.5, '-o': 7, '-f':'/sai_out'},
HALT_EXEC=True)
self.assertRaisesRegexp(AssertionError, 'Halted exec', aln2,
first_files)
self.assertIn('; bwa aln -f /sai_out -n 2.5 -o 7 /fa_in1 /fq_in1',
aln2.BaseCommand)
def test_get_result_paths(self):
"""Tests the function that retrieves the result paths.
aln, sampe, samse, bwasw return only one file.
BWA_index returns 5 files, and the name depends on whether or not the
-p option is on or not
"""
# instantiate objects
index = BWA_index(params = {}, HALT_EXEC=True)
index2 = BWA_index(params = {'-p':'/prefix'}, HALT_EXEC=True)
aln = BWA_aln(params = {'-f':'/sai_out'}, HALT_EXEC=True)
samse = BWA_samse(params = {'-f':'/sam_out'}, HALT_EXEC=True)
sampe = BWA_sampe(params = {'-f':'/sam_out'}, HALT_EXEC=True)
bwasw = BWA_bwasw(params = {'-f':'/sam_out'}, HALT_EXEC=True)
# pass in the data, and make sure the output paths are as expected.
# -p is off here
index_data = {'fasta_in':'/fa_in'}
results = index._get_result_paths(index_data)
self.assertEqual(results['.amb'].Path, '/fa_in.amb')
self.assertEqual(results['.ann'].Path, '/fa_in.ann')
self.assertEqual(results['.bwt'].Path, '/fa_in.bwt')
self.assertEqual(results['.pac'].Path, '/fa_in.pac')
self.assertEqual(results['.sa'].Path, '/fa_in.sa')
# pass in the data, and make sure the output paths are as expected.
# -p is on here
results = index2._get_result_paths(index_data)
self.assertEqual(results['.amb'].Path, '/prefix.amb')
self.assertEqual(results['.ann'].Path, '/prefix.ann')
self.assertEqual(results['.bwt'].Path, '/prefix.bwt')
self.assertEqual(results['.pac'].Path, '/prefix.pac')
self.assertEqual(results['.sa'].Path, '/prefix.sa')
# pass in the data, and make sure the output path is as expected
aln_data = {'prefix':'/fa_in', 'fastq_in':'/fq_in'}
results = aln._get_result_paths(aln_data)
self.assertEqual(results['output'].Path, '/sai_out')
samse_data = {'prefix':'/fa_in', 'sai_in':'/sai_in',
'fastq_in':'/fq_in'}
results = samse._get_result_paths(samse_data)
self.assertEqual(results['output'].Path, '/sam_out')
sampe_data = {'prefix':'/fa_in', 'sai1_in':'/sai1_in',
'sai2_in':'/sai2_in', 'fastq1_in':'/fq1_in',
'fastq2_in':'/fq2_in'}
results = sampe._get_result_paths(sampe_data)
self.assertEqual(results['output'].Path, '/sam_out')
def test_create_bwa_index_from_fasta_file(self):
"""Test create_bwa_index_from_fasta_file
Makes sure that the file paths are as expected.
"""
# get a new temp file for the input fasta
fasta_in = get_tmp_filename(suffix=".fna")
# write the test fasta (see end of this file) to the temp file
fasta = open(fasta_in, 'w')
fasta.write(test_fasta)
fasta.close()
# make sure to remove this fasta file upon tearDown
self.files_to_remove.append(fasta_in)
# run the function
results = create_bwa_index_from_fasta_file(fasta_in, {})
# for each of the 5 output files (not counting stdout, stderr, and
# the exitStatus), make sure the file paths are as expcted.
for filetype, result in results.iteritems():
if filetype not in ('ExitStatus'):
# be sure to remove these 5 files
self.files_to_remove.append(result.name)
if filetype not in ('StdOut', 'ExitStatus', 'StdErr'):
self.assertEqual(fasta_in + filetype, result.name)
def test_assign_reads_to_database(self):
"""Tests for proper failure in assign_reads_to_database
"""
# sets of params that should cause failure
no_alg = {}
wrong_alg = {'algorithm': 'not_an_algorithm'}
no_aln_params = {'algorithm': 'bwa-short'}
# dummy files -- checking for failure as expected, so the function
# won't get as far as actually running the program
database = '/db'
query = '/query'
out = '/sam'
self.assertRaisesRegexp(ApplicationError,
"Must specify which algorithm",
assign_reads_to_database, query, database, out,
no_alg)
self.assertRaisesRegexp(ApplicationError, "Unknown algorithm",
assign_reads_to_database, query, database, out,
wrong_alg)
self.assertRaisesRegexp(ApplicationError,
"aln is an intermediate step",
assign_reads_to_database, query, database, out,
no_aln_params)
test_fasta = '''>NZ_GG770509_647533119
UACUUGGAGUUUGAUCCUGGCUCAGAACGAACGCUGGCGGCAGGCUUAACACAUGCAAGUCGAGCGAGCGGCAGACGGGUGAGUAACGCGUGGGAACGUACCAUUUGCUACGGAAUAACUCAGGGAAACUUGUGCUAAUACCGUAUGUGGAAAGUCGGCAAAUGAUCGGCCCGCGUUGGAUUAGCUAGUUGGUGGGGUAAAGGCUCACCAAGGCGACGAUCCAUAGCUGGUCUGAGAGGAUGAUCAGCCACACUGGGACUGAGACACGGCCCAGACUCCUACGGGAGGCAGCAGUGGGGAAUAUUGGACAAUGGGCGCAAGCCUGAUCCAGCCAUGCCGCGUGAGUGAUGAAGGCCCUAGGGUUGUAAAGCUCUUUCACCGGUGAAGAUGACGGUAACCGGAGAAGAAGCCCCGGCUAACUUCGUGCCAGCAGCCGCGGUAAUACGAAGGGGGCUAGCGUUGUUCGGAUUUACUGGGCGUAAAGCGCACGUAGGCGGACUUUUAAGUCAGGGGUGAAAUCCCGGGGCUCAACCCCGGAACUGCCUUUGAUACUGGAAGUCUUGAGUAUGGUAGAGGUGAGUGGAAUUCCGAGUGUAGAGGUGAAAUUCGUAGAUAUUCGGAGGAACACCAGUGGCGAAGGCGGCUCACUGGACCAACUGACGCUGAGGUGCGAAAGCGUGGGGAGCAAACAGGAUUAGAUACCCUGGUAGUCCACGCCGUAAACGAUGAAUGUUAGCCGUCGGGGCUUCGGUGGCGCAGCUAACGCAUUAAACAUUCCGCCUGGGGAGUGCGGUCGCAAGAUUAAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGUGGAGCAUGUGGUUUAAUUCGAAGCAACGCGCAGAACCUUACCAGCCCUUGACAUCGACAGGUGCUGCAUGGCUGUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUCGCCCUUAGUUGCCAGCAUGGGCACUCUAAGGGGACUGCCGGUGAUAAGCCGGAGGAAGGUGGGGAUGACGUCAAGUCCUCAUGGCCCUUACGGGCUGGGCUACACACGUGCUACAAUGGUGGUCAGUGGGCAGCGAGCACGCGAGUGUGAGCUAAUCUCCGCCAUCUCAGUUCGGAUGCACUCUGCAACUCGAGUGCAGAAGUUGGAAUCGCUAGUAAUCGCGGAUCAGCAUGCCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCACACCAUGGGAGUUGGUUUUACCCGAAGGCGCUUGCUAGGCAGGCGACCACGGUAGGGUCAGCGACUGGGGUGAAGUCGUAACAAGGUAGCCGUAGGGGAACCUGCGGCUGGAUCACCUCCUUUCU
>NZ_GG739926_647533195
UAAUGGGAGUUUGAUCCUGGCUCAGGAUGAACGCUGGCUACAGGCUUAACACAUGCAAGUCGAGGGACCGGCGCACGGGUGAGUAACGCGUAUCCAACCUUCCCGCGACCAAGGGAUAACCUGCCGAAAGGCAGACUAAUACCUUAUGUCCAAAGUCGGUCACGGAUGGGGAUGCGUCCGAUUAGCUUGUUGGCGGGGCAACGGCCCACCAAGGCAUCGAUCGGUAGGGGUUCUGAGAGGAAGGCCCCCCACACUGGAACUGAGACACGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGUCAAUGGGCGGAAGCCUGAACCAGCCAAGUAGCGUGCAGGACGACGGCCUACGGGUUGUAAACUGCUUUUAUGCGGGGAUAUGCAGGUACCGCAUGAAUAAGGACCGGCUAAUUCCGUGCCAGCAGCCGCGGUAAUACGGAAGGUCCGGGCGUUAUCCGGAUUUAUUGGGUUUAAAGGGAGCGCAGGCCGCCGUGCAAGCGUGCCGUGAAAAGCAGCGGCCCAACCGCUGCCCUGCGGCGCGAACUGCUUGGCUUGAGUGCGCCGGAAGCGGGCGGAAUUCGUGGUGUAGCGGUGAAAUGCUUAGAUAUCACGAAGAACCCCGAUUGCGAAGGCAGCCCGCUGUGGCGACUGACGCUGAGGCUCGAAGGUGCGGGUAUCGAACAGGAUUAGAUACCCUGGUAGUCCGCACGGUAAACGAUGGAUACCCGCUGUCCGGCUCUGGGCGGCCAAGCGAAAGCGUUAAGUAUCCCACCUGGGGAGUACGCCGGCAACGGUGAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGAGGAACAUGUGGUUUAAUUCGAUGAUACGCGAGGAACCUUACCCGGGCUUGAAUUGUGAAGGUGCUGCAUGGUUGUCGUCAGCUCGUGCCGUGAGGUGUCGGCUCAAGUGCCAUAACGAGCGCAACCCCUCUCCGCAGUUGCCAUCGGCCGGGCACUCUGCGGACACUGCCGCCGCAAGGUGGAGGAAGGUGGGGAUGACGUCAAAUCAGCACGGCCCUUACGUCCGGGGCCACACACGUGUUACAAUGGCCGGCAGAGGGCUGUCCGCGCGCAAGUGCGGGUGAAUCCCCUCCGGUCCCAGUUCGGAUGGGGUCUGCAACCCGACCCCAGAAGCUGGAUUCGCUAGUAAUCGCGCAUCAGCCAUGGCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCAAGCCAUGAAAGCCGGGGGUGCCUGAAGUCCGUGUCGGCCUAGGGCAAAACCGGUGAUUGGGGCUAAGUCGUAACAAGGUAGCCGUACCGGAAGGUGCGGCUGGAACACCUCCUUUCU
>NZ_ACIZ01000148_643886127
AAUAUGGAGUUUGAUCCUGGCUCAGGAUGAACGCUGGCGGCGUGCCUAAUACAUGCAAGUCGAACGAGUGGCGGACGGGUGAGUAACACGUGGGUAACCUGCCCUUAAGUGGGGGAUAACAUUUGGAAACAGAUGCUAAUACCGCAUAAAGAAAGUCGCUUUUGGAUGGACCCGCGGCGUAUUAGCUAGUUGGUGAGGUAACGGCUCACCAAGGCAAUGAUACGUAGCCGAACUGAGAGGUUGAUCGGCCACAUUGGGACUGAGACACGGCCCAAACUCCUACGGGAGGCAGCAGUAGGGAAUCUUCCACAAUGGACGCAAGUCUGAUGGAGCAACGCCGCGUGAGUGAAGAAGGCUUUCGGGUCGUAAAACUCUGUUGUUGGAGAAGAUGACGGUAUCCAACCAGAAAGCCACGGCUAACUACGUGCCAGCAGCCGCGGUAAUACGUAGGUGGCAAGCGUUAUCCGGAUUUAUUGGGCGUAAAGCGAGCGCAGGCGGUUUUUUAAGUCUGAUGUGAAAGCCCUCGGCUUAACCGAGGAAGUGCAUCGGAAACUGGGAAACUUGAGUGCAGAAGAGGACAGUGGAACUCCAUGUGUAGCGGUGAAAUGCGUAGAUAUAUGGAAGAACACCAGUGGCGAAGGCGGCUGUCUGGUCUGACUGACGCUGAGGCUCGAAAGCAUGGGUAGCGAACAGGAUUAGAUACCCUGGUAGUCCAUGCCGUAAACGAUGAAUGCUAGGUGUUGGAGCUUCAGUGCCGCAGCUAACGCAUUAAGCAUUCCGCCUGGGGAGUACGACCGCAAGGUUGAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGUGGAGCAUGUGGUUUAAUUCGAAGCAACGCGAAGAACCUUACCAGGUCUUGACAUCGACAGGUGGUGCAUGGUUGUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUUAUGACUAGUUGCCAGCAUGGGCACUCUAGUAAGACUGCCGGUGACAAACCGGAGGAAGGUGGGGAUGACGUCAAAUCAUCAUGCCCCUUAUGACCUGGGCUACACACGUGCUACAAUGGAUGGCAACGAGUUGCGAGACCGCGAGGUCAAGCUAAUCUCUUCCAUUCUCAGUUCGGAUGUAGGCUGCAACUCGCCUACAGAAGUCGGAAUCGCUAGUAAUCGCGGAUCAGCACGCCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCACACCAUGAGAGUUUGUAACACCCGAAGCCGGUGCGUAGCGAGCCGUCUAAGGUGGGACAAAUGAUUAGGGUGAAGUCGUAACAAGGUAGCCGUAGGAGAACCUGCGGCUGGAUCACCUCCUUUCU'''
if __name__ == "__main__":
main()
|