1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
|
# Copyright 2013 by Saket Choudhary. Based on test_Clustalw_tool.py by Peter
# Cock .
#
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
from Bio import MissingExternalDependencyError
import sys
import os
import unittest
# TODO from Bio.Sequencing.Applications import BwaBwaswCommandline
from Bio.Sequencing.Applications import BwaIndexCommandline
from Bio.Sequencing.Applications import BwaAlignCommandline
from Bio.Sequencing.Applications import BwaSamseCommandline
from Bio.Sequencing.Applications import BwaSampeCommandline
#################################################################
# Try to avoid problems when the OS is in another language
os.environ['LANG'] = 'C'
bwa_exe = None
if sys.platform == "win32":
# TODO - Check the path?
try:
# This can vary depending on the Windows language.
prog_files = os.environ["PROGRAMFILES"]
except KeyError:
prog_files = r"C:\Program Files"
likely_dirs = ["bwa", "bwa-0.6.2", ""]
likely_exes = ["bwa"]
for folder in likely_dirs:
if os.path.isdir(os.path.join(prog_files, folder)):
for filename in likely_exes:
if os.path.isfile(os.path.join(prog_files, folder, filename)):
bwa_exe = os.path.join(prog_files, folder, filename)
break
if bwa_exe:
break
else:
from Bio._py3k import getoutput
output = getoutput("bwa")
# Since "not found" may be in another language, try and be sure this is
# really the bwa tool's output
bwa_found = False
if "not found" not in output and "bwa" in output \
and "alignment via Burrows-Wheeler transformation" in output:
bwa_exe = "bwa"
skip_aln_tests = False
aln_output = getoutput("bwa aln")
if "unrecognized" in aln_output:
skip_aln_tests = True
print("'bwa aln' is unrecognized, skipping aln/samse/sampe tests")
if not bwa_exe:
raise MissingExternalDependencyError("Install bwa and correctly set"
" the file path to the program if"
" you want to use it from Biopython")
class BwaTestCase(unittest.TestCase):
"""Class for implementing BWA test cases"""
def setUp(self):
self.reference_file = "BWA/human_g1k_v37_truncated.fasta"
self.reference_extensions = ['amb', 'ann', 'bwt', 'pac', 'sa']
self.infile1 = "BWA/HNSCC1_1_truncated.fastq"
self.infile2 = "BWA/HNSCC1_2_truncated.fastq"
self.saifile1 = "BWA/1.sai"
self.saifile2 = "BWA/2.sai"
self.samfile1 = "BWA/1.sam"
self.samfile2 = "BWA/2.sam"
self.samfile = "BWA/out.sam"
self.files_to_clean = [self.saifile1, self.saifile2,
self.samfile1, self.samfile2,
self.samfile]
def tearDown(self):
for filename in self.files_to_clean:
if os.path.isfile(filename):
os.remove(filename)
for extension in self.reference_extensions:
index_file = self.reference_file + "." + extension
if os.path.exists(index_file):
os.remove(index_file)
def test_index(self):
"""Test for creating index files for the reference genome fasta file"""
cmdline = BwaIndexCommandline(bwa_exe)
cmdline.set_parameter("infile", self.reference_file)
cmdline.set_parameter("algorithm", "bwtsw")
stdout, stderr = cmdline()
for extension in self.reference_extensions:
index_file = self.reference_file + "." + extension
self.assertTrue(os.path.exists(index_file),
"Index File %s not found"
% (index_file))
self.assertTrue(stdout.startswith("[bwt_gen]"),
"FASTA indexing failed:\n%s\nStdout:%s"
% (cmdline, stdout))
def do_aln(self, in_file, out_file):
"""Test for generating sai files given the reference and read file"""
cmdline = BwaAlignCommandline(bwa_exe)
cmdline.set_parameter("reference", self.reference_file)
cmdline.read_file = in_file
self.assertTrue(os.path.isfile(in_file))
stdout, stderr = cmdline(stdout=out_file)
self.assertTrue("fail to locate the index" not in stderr,
"Error aligning sequence to reference:\n%s\nStderr:%s"
% (cmdline, stderr))
def create_fasta_index(self):
"""Creates index for fasta file.
BWA requires an indexed fasta for each alignment operation.
This should be called to create an index before any alignment
operation.
"""
cmdline = BwaIndexCommandline(bwa_exe)
cmdline.set_parameter("infile", self.reference_file)
cmdline.set_parameter("algorithm", "bwtsw")
stdout, stderr = cmdline()
if not skip_aln_tests:
def test_samse(self):
"""Test for single end sequencing """
self.create_fasta_index()
self.do_aln(self.infile1, self.saifile1)
cmdline = BwaSamseCommandline(bwa_exe)
cmdline.set_parameter("reference", self.reference_file)
cmdline.set_parameter("read_file", self.infile1)
cmdline.set_parameter("sai_file", self.saifile1)
stdout, stderr = cmdline(stdout=self.samfile1)
with open(self.samfile1, "r") as handle:
headline = handle.readline()
self.assertTrue(headline.startswith("@SQ"),
"Error generating sam files:\n%s\nOutput starts:%s"
% (cmdline, headline))
def test_sampe(self):
"""Test for generating samfile by paired end sequencing"""
self.create_fasta_index()
# Generate sai files from paired end data
self.do_aln(self.infile1, self.saifile1)
self.do_aln(self.infile2, self.saifile2)
cmdline = BwaSampeCommandline(bwa_exe)
cmdline.set_parameter("reference", self.reference_file)
cmdline.set_parameter("sai_file1", self.saifile1)
cmdline.set_parameter("sai_file2", self.saifile2)
cmdline.set_parameter("read_file1", self.infile1)
cmdline.set_parameter("read_file2", self.infile2)
stdout, stderr = cmdline(stdout=self.samfile)
with open(self.samfile, "r") as handle:
headline = handle.readline()
self.assertTrue(headline.startswith("@SQ"),
"Error generating sam files:\n%s\nOutput starts:%s"
% (cmdline, headline))
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity=2)
unittest.main(testRunner=runner)
|