1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
|
import subprocess
from Bio import SeqIO
import unittest
import shutil
import os
import time
import filecmp
# Prereqs:
# module load bowtie/0.12.8
# module load java
# module load samtools
# Trinity
# Copy the .gz files in sample_data/test_Trinity_Assembly to current directory
# Run using nosetests
CPUT = len(os.sched_getaffinity(0))
MEM_FLAG = "--max_memory 2G"
TEMP_FILES = ['both.fa', 'inchworm.K25.L25.fa', 'jellyfish.kmers.fa']
class TestTrinity(unittest.TestCase):
@classmethod
def setUpClass(cls):
try:
os.remove('coverage.log')
except:
pass
def tearDown(self):
shutil.rmtree('trinity_out_dir', True)
def test_sample_data_seq_count(self):
print "When assembling the sample data, the number of sequences assembled should be between 75 and 100"
self.trinity(
"Trinity --seqType fq %s --left reads.left.fq.gz,reads2.left.fq.gz --right reads.right.fq.gz,reads2.right.fq.gz --SS_lib_type RF --CPU %d --no_cleanup" % (MEM_FLAG, CPU))
handle = open("trinity_out_dir/Trinity.fasta", "rU")
seq_count = len([x for x in SeqIO.parse(handle, "fasta")])
handle.close()
self.assertTrue(85 <= seq_count <= 110, msg='Found %s sequences' % seq_count)
def test_sample_data_trimmed_and_normalized(self):
print "When assembling the sample data with the --trimmomatic --normalize_reads flags, the number of sequences assembled should be between 75 and 85"
self.trinity(
"Trinity --seqType fq %s --left reads.left.fq.gz,reads2.left.fq.gz --right reads.right.fq.gz,reads2.right.fq.gz --SS_lib_type RF --CPU %d --trimmomatic --normalize_reads --no_cleanup" % (MEM_FLAG, CPU))
handle = open("trinity_out_dir/Trinity.fasta", "rU")
seq_count = len([x for x in SeqIO.parse(handle, "fasta")])
handle.close()
self.assertTrue(85 <= seq_count <= 100, msg='Found %s sequences' % seq_count)
def test_no_cleanup_leaves_temp_files(self):
print "The --no_cleanup flag should ensure that the output directory is left behind"
self.trinity(
"Trinity --seqType fq %s --left reads.left.fq.gz,reads2.left.fq.gz --right reads.right.fq.gz,reads2.right.fq.gz --SS_lib_type RF --CPU %d --no_cleanup" % (MEM_FLAG, CPU))
for f in TEMP_FILES:
self.assertTrue(os.path.exists("trinity_out_dir/%s" % f), msg="%s not found with no_cleanup" % f)
def test_cleanup_removes_temp_files(self):
print "The --full_cleanup flag should ensure that the output directory is gone but the output file remains"
self.trinity(
"Trinity --seqType fq %s --left reads.left.fq.gz,reads2.left.fq.gz --right reads.right.fq.gz,reads2.right.fq.gz --SS_lib_type RF --CPU %d --full_cleanup" % (MEM_FLAG, CPU))
time.sleep(5) # Make sure the system has time to recognize the directory is gone
self.assertFalse(os.path.exists("trinity_out_dir"), msg="Did full_cleanup but trinity_out_dir exists")
self.assertTrue(os.path.isfile("trinity_out_dir.Trinity.fasta"),
msg="Did full_cleanup but output file not created")
def test_single_end_with_rf_lib_type_error(self):
print "Single reads with an SS_lib_type of RF should result in an error"
try:
subprocess.call("Trinity --seqType fq --single reads.left.fq --SS_lib_type RF", shell=True)
except subprocess.CalledProcessError as e:
self.assertTrue("Error, with --single reads, the --SS_lib_type can be 'F' or 'R' only." in e.output)
def test_single_end_with_fq(self):
print "Single reads with FQ file should succeed"
self.trinity("Trinity %s --seqType fq --single reads.left.fq --SS_lib_type F" % MEM_FLAG)
def test_no_run_chrysalis(self):
print "The --no_run_chrysalis flag should result in no chrysalis subdirectory in the output directory"
self.trinity("Trinity %s --seqType fq --single reads.left.fq --SS_lib_type F --no_run_chrysalis" % MEM_FLAG)
self.assertEquals(0, len(os.listdir('trinity_out_dir/chrysalis')))
def test_no_run_inchworm(self):
print "The --no_run_inchworm flag should result in no inchworm.finished file"
self.trinity("Trinity %s --seqType fq --single reads.left.fq --SS_lib_type F --no_run_inchworm" % MEM_FLAG)
self.assertFalse(os.path.isfile("trinity_out_dir/inchworm.K25.L25.fa.finished"),
msg="Inchworm appears to have run although no_run_inchworm was specified")
self.assertTrue(os.path.isfile("trinity_out_dir/jellyfish.kmers.fa"),
msg="jellyfish.kmers.fa was not created")
def test_no_bowtie(self):
print "The --no_bowtie flag should result in no bowtie.nameSorted.bam file"
self.trinity("Trinity %s --seqType fq --single reads.left.fq --SS_lib_type F --no_bowtie" % MEM_FLAG)
self.assertFalse(os.path.isfile("trinity_out_dir/bowtie.nameSorted.bam"),
msg="Bowtie appears to have run although no_bowtie was specified")
def test_no_distributed_trinity_exec(self):
print "The --no_distributed_trinity_exec flag should run Jellyfish but not create an output file"
self.trinity("Trinity %s --seqType fq --single reads.left.fq --SS_lib_type F --no_distributed_trinity_exec" % MEM_FLAG)
self.assertTrue(os.path.isfile("trinity_out_dir/inchworm.K25.L25.fa.finished"),
msg="Inchworm did not appear to run with no_distributed_trinity_exec flag")
self.assertTrue(os.path.isfile("trinity_out_dir/jellyfish.kmers.fa.histo"),
msg="Jellyfish did not appear to run with no_distributed_trinity_exec flag")
self.assertFalse(os.path.isfile("trinity_out_dir/Trinity.fasta"),
msg="Trinity.fasta created with no_distributed_trinity_exec")
def test_single_end_with_fa_and_reverse(self):
print "The --no_distributed_trinity_exec flag should run Jellyfish but not create an output file"
self.fq2fa()
self.trinity("Trinity %s --seqType fa --single reads.fa --SS_lib_type R" % MEM_FLAG)
def test_output_correctly_changes_dir(self):
print "The --output flag should change the output directory"
shutil.rmtree('trinity_test', True)
self.trinity("Trinity %s --seqType fq --single reads.left.fq --SS_lib_type F --output trinity_test" % MEM_FLAG)
self.assertTrue(os.path.exists("trinity_test"), msg="Changed output directory but it was not created")
shutil.rmtree('trinity_test', True)
def test_scaffold_iworm_contigs(self):
print "scaffold_iworm_contigs works as expected"
os.environ['LD_LIBRARY_PATH'] = os.environ['LD_LIBRARY_PATH'] + ':../src/trinity-plugins/htslib'
exe = "../src/trinity-plugins/scaffold_iworm_contigs/scaffold_iworm_contigs"
bamfile = "iworm.bowtie.nameSorted.bam"
ifile = "inchworm.K25.L25.fa"
f = subprocess.check_output([exe, bamfile, ifile]).split('\n')
expected_result = [['a340;25', '339', 'a9;40', '8', '41'],
['a719;8', '718', 'a832;15', '831', '33'],
['a1;43', '0', 'a346;23', '345', '31'],
['a346;23', '345', 'a9;40', '8', '26'],
['a339;142', '338', 'a37;14', '36', '25'],
['a3;61', '2', 'a432;9', '431', '23'],
['a345;34', '344', 'a40;12', '39', '21'],
['a354;96', '353', 'a368;13', '367', '18'],
['a689;4', '688', 'a774;6', '773', '13']]
actual_result = [line.split('\t') for line in f if line]
self.assertEquals(expected_result, actual_result[0:9])
actual_lengths = [int(s[4].strip()) for s in actual_result]
expected_order = list(reversed(sorted(actual_lengths)))
self.assertEquals(expected_order, actual_lengths)
def test_Inchworm_handles_compressed_files(self):
print "A compressed single file should be handlred correctly by Inchworm"
self.trinity('Trinity %s --seqType fq --single reads.left.fq.gz --SS_lib_type F --no_run_chrysalis' % MEM_FLAG);
num_lines = sum(1 for line in open('trinity_out_dir/inchworm.K25.L25.fa'))
self.assertTrue(2850 <= num_lines <= 3100, msg='Found %s lines' % num_lines)
def test_QuantifyGraph_works(self):
exe = "../src/Chrysalis/QuantifyGraph"
args = " -g Cbin0/c0.graph.tmp -i Cbin0/c0.reads.tmp -o c0.graph.out -max_reads 200000 -k 24"
result = subprocess.call(exe + args,shell=True)
self.assertEquals(0, result, "QuantifyGraph failed")
self.assertTrue(filecmp.cmp("c0.graph.out", "Cbin0/c0.graph.out.master", shallow=False), "Unexpected result from QuantifyGraph")
### information tests
def test_cite(self):
print "Cite flag should return citing information"
expected = '\n\n* Trinity:\nFull-length transcriptome assembly from RNA-Seq data without a reference genome.\nGrabherr MG, Haas BJ, Yassour M, Levin JZ, Thompson DA, Amit I, Adiconis X, Fan L,\nRaychowdhury R, Zeng Q, Chen Z, Mauceli E, Hacohen N, Gnirke A, Rhind N, di Palma F,\nBirren BW, Nusbaum C, Lindblad-Toh K, Friedman N, Regev A.\nNature Biotechnology 29, 644\xe2\x80\x93652 (2011)\nPaper: http://www.nature.com/nbt/journal/v29/n7/full/nbt.1883.html\nCode: http://trinityrnaseq.sf.net\n\n\n'
cite = subprocess.check_output(["Trinity", "--cite"])
self.assertEqual(expected, cite)
def test_version(self):
print "Version flag should return version information"
try:
subprocess.check_output(["Trinity", "--version"])
self.fail("Version returned 0 errorcode!")
except subprocess.CalledProcessError as e:
self.assertTrue('Trinity version: __TRINITY_VERSION_TAG__' in e.output)
self.assertTrue('using Trinity devel version. Note, latest production release is: v2.1.1' in e.output)
def test_show_full_usage_info(self):
print "show_full_usage_info flag has several option sections"
try:
subprocess.check_output(["Trinity", "--show_full_usage_info"])
except subprocess.CalledProcessError as e:
self.assertTrue("Inchworm and K-mer counting-related options" in e.output)
self.assertTrue("Chrysalis-related options" in e.output)
self.assertTrue("Butterfly-related options" in e.output)
self.assertTrue("Quality Trimming Options" in e.output)
self.assertTrue("In silico Read Normalization Options" in e.output)
### Invalid command line tests
def test_no_JM_specified_error(self):
print "max_memory flag is required"
error = self.get_error("Trinity --seqType fq --single reads.left.fq --SS_lib_type F")
self.assertTrue("Error, must specify max memory for jellyfish to use, eg. --max_memory 10G" in error)
def test_invalid_option_error(self):
print "Invalid options result in an error"
error = self.get_error("Trinity --squidward")
self.assertTrue("ERROR, don't recognize parameter: --squidward" in error)
def test_set_no_cleanup_and_full_cleanup_error(self):
print "Setting no_cleanup and full_cleanup together results in an error"
error = self.get_error("Trinity --no_cleanup --full_cleanup")
self.assertTrue("cannot set --no_cleanup and --full_cleanup as they contradict" in error)
### Helper methods
def trinity(self, cmdline):
print "Command line:", cmdline
with open("coverage.log", 'a') as file_out:
file_out.write("COMMAND: %s\n" % cmdline)
file_out.flush()
subprocess.call(cmdline,shell=True, stdout=file_out)
file_out.write("TEST COMPLETE\n")
def get_error(self, cmd):
try:
subprocess.check_output(cmd.split(' '))
except subprocess.CalledProcessError as e:
return e.output
def fq2fa(self):
handle = open("reads.left.fq", "rU")
records = [x for x in SeqIO.parse(handle, "fastq")]
handle.close()
SeqIO.write(records, "reads.fa", "fasta")
|