File: test_align_seqs.py

package info (click to toggle)
qiime 1.4.0-2
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 29,704 kB
  • sloc: python: 77,837; haskell: 379; sh: 113; makefile: 103
file content (424 lines) | stat: -rw-r--r-- 14,981 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
#!/usr/bin/env python

"""Tests of code for aligning 16S sequences"""

__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2011, The QIIME Project" 
__credits__ = ["Greg Caporaso","Jeremy Widmann"] 
__license__ = "GPL"
__version__ = "1.4.0"
__maintainer__ = "Greg Caporaso"
__email__ = "gregcaporaso@gmail.com"
__status__ = "Release"

from os import remove
from os.path import getsize
from cogent import LoadSeqs, DNA
from cogent.core.alignment import DenseAlignment, Alignment
from cogent.util.unit_test import TestCase, main
from qiime.util import get_tmp_filename
from qiime.align_seqs import (
    Aligner, CogentAligner, PyNastAligner, InfernalAligner,
    alignment_module_names,
    )

def remove_files(list_of_filepaths,error_on_missing=True):
    missing = []
    for fp in list_of_filepaths:
        try:
            remove(fp)
        except OSError:
            missing.append(fp)

    if error_on_missing and missing:
        raise OSError, "Some filepaths were not accessible: %s" % '\t'.join(missing)

class AlignerTests(TestCase):
    """Tests of the abstract Aligner class"""

    def test_init(self):
        """Abstract Aligner __init__ should store name, params"""
        p = Aligner({})
        self.assertEqual(p.Name, 'Aligner')
        self.assertEqual(p.Params, {})

    def test_call(self):
        """Abstract Aligner __call__ should raise NotImplementedError"""
        p = Aligner({})
        self.assertRaises(NotImplementedError, p, '/path/to/seqs')

class SharedSetupTestCase(TestCase):
    """Shared setup for aligner tests"""
       
    def tearDown(self):
        remove_files(self._paths_to_clean_up)
 
class CogentAlignerTests(SharedSetupTestCase):
    """Tests of the CogentAligner class"""
    def setUp(self):
        self.input_fp = get_tmp_filename(\
         prefix='CogentAlignerTests_',suffix='.fasta')
        open(self.input_fp,'w').write(seqs_for_muscle)
        
        self._paths_to_clean_up =\
         [self.input_fp]
        self.muscle_module = alignment_module_names['muscle']

    def test_call_correct_alignment(self):
        """CogentAligner: output expected alignment file
        """
        p = CogentAligner({'Module': self.muscle_module})
        log_fp = get_tmp_filename(\
         prefix='CogentAlignerTests_',suffix='.log')
        self._paths_to_clean_up.append(log_fp)
         
        actual = p(result_path=None, seq_path=self.input_fp,
            log_path=log_fp)
        expected = expected_muscle_alignment
        #note: lines in diff order w/ diff versions
        self.assertEqualItems(str(actual).splitlines(),expected.splitlines())

    def test_muscle_max_memory(self):
        """CogentAligner: muscle_max_memory should be passed to alignment fcn
        """
        p = CogentAligner({
            'Module': self.muscle_module,
            '-maxmb': '200',
            })
        self.assertEqual(p.Params["-maxmb"], "200")

        log_fp = get_tmp_filename(\
         prefix='CogentAlignerTests_',suffix='.log')
        self._paths_to_clean_up.append(log_fp)
         
        actual = p(result_path=None, seq_path=self.input_fp,
            log_path=log_fp)
        expected = expected_muscle_alignment
        #note: lines in diff order w/ diff versions
        self.assertEqualItems(str(actual).splitlines(),expected.splitlines())
        
 
class InfernalAlignerTests(SharedSetupTestCase):
    """Tests of the InfernalAligner class"""
    
    def setUp(self):
        self.infernal_test1_input_fp = get_tmp_filename(
            prefix='InfernalAlignerTests_',suffix='.fasta')
        open(self.infernal_test1_input_fp,'w').write(infernal_test1_input_fasta)

        self.infernal_test1_template_fp = get_tmp_filename(
            prefix='InfernalAlignerTests_',suffix='template.sto')
        open(self.infernal_test1_template_fp,'w').\
         write(infernal_test1_template_stockholm)

        # create temp file names (and touch them so we can reliably 
        # clean them up)
        self.result_fp = get_tmp_filename(
            prefix='InfernalAlignerTests_',suffix='.fasta')
        open(self.result_fp,'w').close()
        
        self.log_fp = get_tmp_filename(
            prefix='InfernalAlignerTests_',suffix='.log')
        open(self.log_fp,'w').close()

        self._paths_to_clean_up = [
            self.infernal_test1_input_fp,
            self.result_fp,
            self.log_fp,
            self.infernal_test1_template_fp,
            ]

        self.infernal_test1_aligner = InfernalAligner({
                'template_filepath': self.infernal_test1_template_fp,
                })
        self.infernal_test1_expected_aln = \
         LoadSeqs(data=infernal_test1_expected_alignment,aligned=Alignment,\
            moltype=DNA)

    def test_call_infernal_test1_file_output(self):
        """InfernalAligner writes correct output files for infernal_test1 seqs
        """
        # do not collect results; check output files instead
        actual = self.infernal_test1_aligner(\
         self.infernal_test1_input_fp, result_path=self.result_fp,
         log_path=self.log_fp)
         
        self.assertTrue(actual == None,\
         "Result should be None when result path provided.")
         
        expected_aln = self.infernal_test1_expected_aln
        actual_aln = LoadSeqs(self.result_fp,aligned=Alignment)
        self.assertEqual(actual_aln,expected_aln)

    def test_call_infernal_test1(self):
        """InfernalAligner: functions as expected when returing objects
        """
        actual_aln = self.infernal_test1_aligner(self.infernal_test1_input_fp)
        expected_aln = self.infernal_test1_expected_aln

        expected_names = ['seq_1', 'seq_2', 'seq_3']
        self.assertEqual(sorted(actual_aln.Names), expected_names)
        self.assertEqual(actual_aln, expected_aln)


class PyNastAlignerTests(SharedSetupTestCase):
    """Tests of the PyNastAligner class"""

    def setUp(self):
        self.pynast_test1_input_fp = get_tmp_filename(
            prefix='PyNastAlignerTests_',suffix='.fasta')
        open(self.pynast_test1_input_fp,'w').write(pynast_test1_input_fasta)

        self.pynast_test1_template_fp = get_tmp_filename(
            prefix='PyNastAlignerTests_',suffix='template.fasta')
        open(self.pynast_test1_template_fp,'w').\
         write(pynast_test1_template_fasta)

        self.pynast_test_template_w_dots_fp = get_tmp_filename(
            prefix='PyNastAlignerTests_',suffix='template.fasta')
        open(self.pynast_test_template_w_dots_fp,'w').\
         write(pynast_test1_template_fasta.replace('-','.'))

        self.pynast_test_template_w_u_fp = get_tmp_filename(
            prefix='PyNastAlignerTests_',suffix='template.fasta')
        open(self.pynast_test_template_w_u_fp,'w').\
         write(pynast_test1_template_fasta.replace('T','U'))

        self.pynast_test_template_w_lower_fp = get_tmp_filename(
            prefix='PyNastAlignerTests_',suffix='template.fasta')
        open(self.pynast_test_template_w_lower_fp,'w').\
         write(pynast_test1_template_fasta.lower())

        # create temp file names (and touch them so we can reliably 
        # clean them up)
        self.result_fp = get_tmp_filename(
            prefix='PyNastAlignerTests_',suffix='.fasta')
        open(self.result_fp,'w').close()
        self.failure_fp = get_tmp_filename(
            prefix='PyNastAlignerTests_',suffix='.fasta')
        open(self.failure_fp,'w').close()
        self.log_fp = get_tmp_filename(
            prefix='PyNastAlignerTests_',suffix='.log')
        open(self.log_fp,'w').close()

        self._paths_to_clean_up = [
            self.pynast_test1_input_fp,
            self.result_fp,
            self.failure_fp,
            self.log_fp,
            self.pynast_test1_template_fp,
            self.pynast_test_template_w_dots_fp,
            self.pynast_test_template_w_u_fp,
            self.pynast_test_template_w_lower_fp
            ]

        self.pynast_test1_aligner = PyNastAligner({
                'template_filepath': self.pynast_test1_template_fp,
                'min_len': 15,
                })

        self.pynast_test1_expected_aln = \
         LoadSeqs(data=pynast_test1_expected_alignment,aligned=DenseAlignment)
        self.pynast_test1_expected_fail = \
         LoadSeqs(data=pynast_test1_expected_failure,aligned=False)

    def test_call_pynast_test1_file_output(self):
        """PyNastAligner writes correct output files for pynast_test1 seqs
        """
        # do not collect results; check output files instead
        actual = self.pynast_test1_aligner(\
         self.pynast_test1_input_fp, result_path=self.result_fp,
         log_path=self.log_fp, failure_path=self.failure_fp)
         
        self.assertTrue(actual == None,\
         "Result should be None when result path provided.")
         
        expected_aln = self.pynast_test1_expected_aln
        actual_aln = LoadSeqs(self.result_fp,aligned=DenseAlignment)
        self.assertEqual(actual_aln,expected_aln)

        actual_fail = LoadSeqs(self.failure_fp,aligned=False)
        self.assertEqual(actual_fail.toFasta(),\
                         self.pynast_test1_expected_fail.toFasta())


    def test_call_pynast_test1_file_output_alt_params(self):
        """PyNastAligner writes correct output files when no seqs align
        """
        aligner = PyNastAligner({
                'template_filepath': self.pynast_test1_template_fp,
                'min_len':1000})
                
        actual = aligner(\
         self.pynast_test1_input_fp, result_path=self.result_fp,
         log_path=self.log_fp, failure_path=self.failure_fp)
         
        self.assertTrue(actual == None,\
         "Result should be None when result path provided.")
        
        self.assertEqual(getsize(self.result_fp),0,\
         "No alignable seqs should result in an empty file.")

        # all seqs reported to fail
        actual_fail = LoadSeqs(self.failure_fp,aligned=False)
        self.assertEqual(actual_fail.getNumSeqs(),3)

    def test_call_pynast_test1(self):
        """PyNastAligner: functions as expected when returing objects
        """
        actual_aln = self.pynast_test1_aligner(self.pynast_test1_input_fp)
        expected_aln = self.pynast_test1_expected_aln

        expected_names = ['1 description field 1..23', '2 1..23']
        self.assertEqual(actual_aln.Names, expected_names)
        self.assertEqual(actual_aln, expected_aln)
        
    def test_call_pynast_template_aln_with_dots(self):
        """PyNastAligner: functions when template alignment contains dots
        """
        pynast_aligner = PyNastAligner({
                'template_filepath': self.pynast_test_template_w_dots_fp,
                'min_len': 15,
                })
        actual_aln = pynast_aligner(self.pynast_test1_input_fp)
        expected_aln = self.pynast_test1_expected_aln

        expected_names = ['1 description field 1..23', '2 1..23']
        self.assertEqual(actual_aln.Names, expected_names)
        self.assertEqual(actual_aln, expected_aln)
        
    def test_call_pynast_template_aln_with_lower(self):
        """PyNastAligner: functions when template alignment contains lower case
        """
        pynast_aligner = PyNastAligner({
                'template_filepath': self.pynast_test_template_w_lower_fp,
                'min_len': 15,
                })
        actual_aln = pynast_aligner(self.pynast_test1_input_fp)
        expected_aln = self.pynast_test1_expected_aln

        expected_names = ['1 description field 1..23', '2 1..23']
        self.assertEqual(actual_aln.Names, expected_names)
        self.assertEqual(actual_aln, expected_aln)

    def test_call_pynast_template_aln_with_U(self):
        """PyNastAligner: error message when template contains bad char
        """
        pynast_aligner = PyNastAligner({
                'template_filepath': self.pynast_test_template_w_u_fp,
                'min_len': 15,
                })
        self.assertRaises(KeyError,pynast_aligner,self.pynast_test1_input_fp)
        
    def test_call_pynast_alt_pairwise_method(self):
        """PyNastAligner: alternate pairwise alignment method produces correct alignment
        """
        aligner = PyNastAligner({
                'pairwise_alignment_method': 'muscle',
                'template_filepath': self.pynast_test1_template_fp,
                'min_len': 15,
                })
        actual_aln = aligner(self.pynast_test1_input_fp)
        expected_aln = self.pynast_test1_expected_aln
        self.assertEqual(actual_aln, expected_aln)
        
    def test_call_pynast_test1_alt_min_len(self):
        """PyNastAligner: returns no result when min_len too high
        """
        aligner = PyNastAligner({
                'template_filepath': self.pynast_test1_template_fp,
                'min_len':1000})
        
        actual_aln = aligner(\
         self.pynast_test1_input_fp)
        expected_aln = {}

        self.assertEqual(actual_aln, expected_aln)
        
    def test_call_pynast_test1_alt_min_pct(self):
        """PyNastAligner: returns no result when min_pct too high
        """
        aligner = PyNastAligner({
                'template_filepath': self.pynast_test1_template_fp,
                'min_len':15,
                'min_pct':100.0})
        
        actual_aln = aligner(self.pynast_test1_input_fp)
        expected_aln = {}

        self.assertEqual(actual_aln, expected_aln) 
        
    def tearDown(self):
        """
        """
        remove_files(self._paths_to_clean_up)



seqs_for_muscle= \
""">abc
ACACACAC
>def
ACAGACAC
>ghi
ACAGACACTT
>jkl
TTACAC"""

expected_muscle_alignment = """>jkl\n--TTACAC--\n>abc\nACACACAC--\n>ghi\nACAGACACTT\n>def\nACAGACAC--\n"""

infernal_test1_input_fasta = """>seq_1
ACTGCTAGCTAGTAGCGTACGTA
>seq_2
GCTACGTAGCTAC
>seq_3
GCGGCTATTAGATCGTA"""

infernal_test1_template_stockholm = """# STOCKHOLM 1.0
seq_a           TAGGCTCTGATATAATAGC-TCTC---------
seq_b           ----TATCGCTTCGACGAT-TCTCTGATAGAGA
seq_c           ------------TGACTAC-GCAT---------
#=GC SS_cons    ............((.(....)))..........
//"""

infernal_test1_expected_alignment = """>seq_1
-----ACTGCTA-GCTAGTAGCGTACGTA----
>seq_2
--------GCTACG-TAGCTAC-----------
>seq_3
-----GCGGCTATTAGATC-GTA----------
"""

pynast_test1_template_fasta = """>1
ACGT--ACGTAC-ATA-C-----CC-T-G-GTA-G-T---
>2
AGGTTTACGTAG-ATA-C-----CC-T-G-GTA-G-T---
>3
AGGTACT-CCAC-ATA-C-----CC-T-G-GTA-G-T---
>4
TCGTTCGT-----ATA-C-----CC-T-G-GTA-G-T---
>5
ACGTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---
"""

pynast_test1_input_fasta = """>1 description field
ACCTACGTTAATACCCTGGTAGT
>2
ACCTACGTTAATACCCTGGTAGT
>3
AA
"""

pynast_test1_expected_alignment = """>1 description field 1..23
ACCTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---
>2 1..23
ACCTACGT-TA--ATA-C-----CC-T-G-GTA-G-T---
"""

pynast_test1_expected_failure = """>3
AA
"""

#run unit tests if run from command-line
if __name__ == '__main__':
    main()