1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
|
import unittest
import shutil
import os
import subprocess
from Bio import SeqIO
# single, leftright
# fasta, fastq
# one file, two files
# forward, reverse
# clear, gzip, bzip
class TestTrinityPrepFlag(unittest.TestCase):
@classmethod
def setUpClass(cls):
try:
os.remove('coverage.log')
except:
pass
def tearDown(self):
shutil.rmtree('trinity_out_dir', True)
#pass
def test_fastq(self):
self.trinity("left1.fq", "fq")
self.assertEquals(30575, self.count_seqs(), "Unexpected sequence count")
def test_fastq_gz(self):
self.trinity("left1.fq.gz", "fq")
self.assertEquals(30575, self.count_seqs(), "Unexpected sequence count")
def test_fastq_bz2(self):
self.trinity("left1.fq.bz2", "fq")
self.assertEquals(30575, self.count_seqs(), "Unexpected sequence count")
def test_fastq_multiple_files_single(self):
self.trinity("left1.fq,left1.fq.gz", "fq")
self.assertEquals(61150, self.count_seqs(), "Unexpected sequence count")
def test_fastq_multiple_files_single_bz2(self):
self.trinity("left1.fq.bz2,left1.fq.gz", "fq")
self.assertEquals(61150, self.count_seqs(), "Unexpected sequence count")
def test_fastq_multiple_files_single_reverse(self):
self.trinity("left1.fq,left1.fq.gz", "fq", True)
self.assertEquals(61150, self.count_seqs(), "Unexpected sequence count")
def test_fasta(self):
self.trinity("left1.fa")
self.assertEquals(30575, self.count_seqs(), "Unexpected sequence count")
def test_fasta_gz(self):
self.trinity("left1.fa.gz")
self.assertEquals(30575, self.count_seqs(), "Unexpected sequence count")
def test_fasta_multiple_files_single(self):
self.trinity("left1.fa,left1.fa.gz")
self.assertEquals(61150, self.count_seqs(), "Unexpected sequence count")
def test_fasta_multiple_files_single_reverse(self):
self.trinity("left1.fa,left1.fa.gz", reverse=True)
self.assertEquals(61150, self.count_seqs(), "Unexpected sequence count")
def test_paired_fastq(self):
self.trinity("left1.fq", "fq", morefiles="right1.fq")
self.assertEquals(61150, self.count_seqs(), "Unexpected sequence count")
def test_paired_fastq_gz(self):
self.trinity("left1.fq.gz", "fq", morefiles="right1.fq.gz")
self.assertEquals(61150, self.count_seqs(), "Unexpected sequence count")
def test_fastq_multiple_files_paired(self):
self.trinity("left1.fq,left1.fq.gz", "fq", morefiles="right1.fq,right1.fq.gz")
self.assertEquals(122300, self.count_seqs(), "Unexpected sequence count")
def test_fastq_multiple_files_paired_reverse(self):
self.trinity("left1.fq,left1.fq.gz", "fq", reverse=True, morefiles="right1.fq,right1.fq.gz")
self.assertEquals(122300, self.count_seqs(), "Unexpected sequence count")
def test_fasta_paired(self):
self.trinity("left1.fa", morefiles="right1.fa")
self.assertEquals(61150, self.count_seqs(), "Unexpected sequence count")
def test_paired_sequences_have_1_or_2_extension(self):
self.trinity("sra_test.fq", morefiles="sra_test2.fq", seqtype='fq')
self.assertEquals(0, self.count_bad_endings(), "Found sequences with bad endings")
def test_fasta_gz_paired(self):
self.trinity("left1.fa.gz", morefiles="right1.fa.gz")
self.assertEquals(61150, self.count_seqs(), "Unexpected sequence count")
def test_fasta_multiple_files_paired(self):
self.trinity("left1.fa,left1.fa.gz", morefiles="right1.fa,right1.fa.gz")
self.assertEquals(61150, self.count_seqs(), "Unexpected sequence count")
def test_fasta_multiple_files_paired(self):
self.trinity("left1.fa,left1.fa.gz", morefiles="right1.fa,right1.fa.gz", reverse=True)
self.assertEquals(122300, self.count_seqs(), "Unexpected sequence count")
def trinity(self, files, seqtype='fa', reverse=False, morefiles=None):
if morefiles:
tpl = "Trinity --left %s --right %s --prep --seqType %s --max_memory 2G --no_version_check --no_normalize_reads"
cmdline = tpl % (files, morefiles, seqtype)
else:
tpl = "Trinity --single %s --prep --seqType %s --max_memory 2G --no_version_check --no_normalize_reads"
cmdline = tpl % (files, seqtype)
if reverse:
cmdline += " --SS_lib_type " + ('RF' if morefiles else 'R')
print "Command line:", cmdline
with open("coverage.log", 'a') as file_out:
subprocess.call(cmdline,shell=True, stdout=file_out)
def count_seqs(self):
f = "trinity_out_dir/single.fa"
if os.path.isfile(f):
handle = open(f, "rU")
else:
handle = open("trinity_out_dir/both.fa", "rU")
seq_count = len([x for x in SeqIO.parse(handle, "fasta")])
handle.close()
return seq_count
def count_bad_endings(self):
f = "trinity_out_dir/single.fa"
if os.path.isfile(f):
handle = open(f, "rU")
else:
handle = open("trinity_out_dir/both.fa", "rU")
seq_count = len(list(x for x in SeqIO.parse(handle, "fasta") if not (x.id.endswith('/1') or x.id.endswith('/2'))))
handle.close()
return seq_count
|