File: test_BWA_tool.py

package info (click to toggle)
python-biopython 1.68%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 46,860 kB
  • ctags: 13,237
  • sloc: python: 160,306; xml: 93,216; ansic: 9,118; sql: 1,208; makefile: 155; sh: 63
file content (171 lines) | stat: -rw-r--r-- 6,982 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# Copyright 2013 by Saket Choudhary.  Based on test_Clustalw_tool.py by Peter
# Cock .
#
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

from Bio import MissingExternalDependencyError
import sys
import os
import unittest
# TODO from Bio.Sequencing.Applications import BwaBwaswCommandline
from Bio.Sequencing.Applications import BwaIndexCommandline
from Bio.Sequencing.Applications import BwaAlignCommandline
from Bio.Sequencing.Applications import BwaSamseCommandline
from Bio.Sequencing.Applications import BwaSampeCommandline


#################################################################

# Try to avoid problems when the OS is in another language
os.environ['LANG'] = 'C'

bwa_exe = None
if sys.platform == "win32":
    # TODO - Check the path?
    try:
        # This can vary depending on the Windows language.
        prog_files = os.environ["PROGRAMFILES"]
    except KeyError:
        prog_files = r"C:\Program Files"
    likely_dirs = ["bwa", "bwa-0.6.2", ""]
    likely_exes = ["bwa"]
    for folder in likely_dirs:
        if os.path.isdir(os.path.join(prog_files, folder)):
            for filename in likely_exes:
                if os.path.isfile(os.path.join(prog_files, folder, filename)):
                    bwa_exe = os.path.join(prog_files, folder, filename)
                    break
            if bwa_exe:
                break
else:
    from Bio._py3k import getoutput
    output = getoutput("bwa")

    # Since "not found" may be in another language, try and be sure this is
    # really the bwa tool's output
    bwa_found = False
    if "not found" not in output and "bwa" in output \
            and "alignment via Burrows-Wheeler transformation" in output:
        bwa_exe = "bwa"
    skip_aln_tests = False
    aln_output = getoutput("bwa aln")
    if "unrecognized" in aln_output:
        skip_aln_tests = True
        print("'bwa aln' is unrecognized, skipping aln/samse/sampe tests")

if not bwa_exe:
    raise MissingExternalDependencyError("Install bwa and correctly set"
                                         " the file path to the program if"
                                         " you want to use it from Biopython")


class BwaTestCase(unittest.TestCase):
    """Class for implementing BWA test cases"""
    def setUp(self):
        self.reference_file = "BWA/human_g1k_v37_truncated.fasta"
        self.reference_extensions = ['amb', 'ann', 'bwt', 'pac', 'sa']
        self.infile1 = "BWA/HNSCC1_1_truncated.fastq"
        self.infile2 = "BWA/HNSCC1_2_truncated.fastq"
        self.saifile1 = "BWA/1.sai"
        self.saifile2 = "BWA/2.sai"
        self.samfile1 = "BWA/1.sam"
        self.samfile2 = "BWA/2.sam"
        self.samfile = "BWA/out.sam"
        self.files_to_clean = [self.saifile1, self.saifile2,
                               self.samfile1, self.samfile2,
                               self.samfile]

    def tearDown(self):
        for filename in self.files_to_clean:
            if os.path.isfile(filename):
                os.remove(filename)
        for extension in self.reference_extensions:
            index_file = self.reference_file + "." + extension
            if os.path.exists(index_file):
                os.remove(index_file)

    def test_index(self):
        """Test for creating index files for the reference genome fasta file"""
        cmdline = BwaIndexCommandline(bwa_exe)
        cmdline.set_parameter("infile", self.reference_file)
        cmdline.set_parameter("algorithm", "bwtsw")
        stdout, stderr = cmdline()
        for extension in self.reference_extensions:
            index_file = self.reference_file + "." + extension
            self.assertTrue(os.path.exists(index_file),
                            "Index File %s not found"
                            % (index_file))
        self.assertTrue(stdout.startswith("[bwt_gen]"),
                        "FASTA indexing failed:\n%s\nStdout:%s"
                        % (cmdline, stdout))

    def do_aln(self, in_file, out_file):
        """Test for generating sai files given the reference and read file"""
        cmdline = BwaAlignCommandline(bwa_exe)
        cmdline.set_parameter("reference", self.reference_file)
        cmdline.read_file = in_file
        self.assertTrue(os.path.isfile(in_file))
        stdout, stderr = cmdline(stdout=out_file)

        self.assertTrue("fail to locate the index" not in stderr,
                        "Error aligning sequence to reference:\n%s\nStderr:%s"
                        % (cmdline, stderr))

    def create_fasta_index(self):
        """Creates index for fasta file.

        BWA requires an indexed fasta for each alignment operation.
        This should be called to create an index before any alignment
        operation.
        """
        cmdline = BwaIndexCommandline(bwa_exe)
        cmdline.set_parameter("infile", self.reference_file)
        cmdline.set_parameter("algorithm", "bwtsw")
        stdout, stderr = cmdline()

    if not skip_aln_tests:

        def test_samse(self):
            """Test for single end sequencing """
            self.create_fasta_index()
            self.do_aln(self.infile1, self.saifile1)
            cmdline = BwaSamseCommandline(bwa_exe)
            cmdline.set_parameter("reference", self.reference_file)
            cmdline.set_parameter("read_file", self.infile1)
            cmdline.set_parameter("sai_file", self.saifile1)
            stdout, stderr = cmdline(stdout=self.samfile1)

            with open(self.samfile1, "r") as handle:
                headline = handle.readline()
            self.assertTrue(headline.startswith("@SQ"),
                            "Error generating sam files:\n%s\nOutput starts:%s"
                            % (cmdline, headline))

        def test_sampe(self):
            """Test for generating samfile by paired end sequencing"""
            self.create_fasta_index()

            # Generate sai files from paired end data
            self.do_aln(self.infile1, self.saifile1)
            self.do_aln(self.infile2, self.saifile2)

            cmdline = BwaSampeCommandline(bwa_exe)
            cmdline.set_parameter("reference", self.reference_file)
            cmdline.set_parameter("sai_file1", self.saifile1)
            cmdline.set_parameter("sai_file2", self.saifile2)
            cmdline.set_parameter("read_file1", self.infile1)
            cmdline.set_parameter("read_file2", self.infile2)
            stdout, stderr = cmdline(stdout=self.samfile)

            with open(self.samfile, "r") as handle:
                headline = handle.readline()
            self.assertTrue(headline.startswith("@SQ"),
                            "Error generating sam files:\n%s\nOutput starts:%s"
                            % (cmdline, headline))


if __name__ == "__main__":
    runner = unittest.TextTestRunner(verbosity=2)
    unittest.main(testRunner=runner)