1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
|
#!/usr/bin/python3
"""
Runs DWGSIM_EVAL
usage: dwgsim_eval_wrapper.py [options]
-a,--alignmentScore=a: split alignments by alignment score instead of mapping quality
-b,--bwa=b: alignments are from BWA (SOLiD only)
-c,--colorSpace=c: color space alignments
-d,--scoreFactor=d: divide quality/alignment score by this factor
-g,--wiggle=g: gap "wiggle"
-n,--numReads=n: number of raw input paired-end reads (otherwise, inferred from all SAM records present).
-q,--minMapq=q: consider only alignments with this mapping quality or greater.
-z,--singleEnd=z: input contains only single end reads
-S,--sam=s: input SAM file
-B,--bam=B: input BAM file
-p,--printIncorrect=p: print incorrect alignments
-s,--numSnps=s: consider only alignments with the number of specified SNPs
-e,--numErrors=e: consider only alignments with the number of specified errors
-i,--indels=i: consider only alignments with indels
-o,--output=u: The file to save the output
"""
import optparse, os, shutil, subprocess, sys, tempfile
def stop_err( msg ):
sys.stderr.write( '%s\n' % msg )
sys.exit()
def run_process ( cmd, name, tmp_dir, buffsize ):
try:
tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
tmp_stderr = open( tmp, 'wb' )
proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
returncode = proc.wait()
tmp_stderr.close()
# get stderr, allowing for case where it's very large
tmp_stderr = open( tmp, 'rb' )
stderr = ''
try:
while True:
stderr += tmp_stderr.read( buffsize )
if not stderr or len( stderr ) % buffsize != 0:
break
except OverflowError:
pass
tmp_stderr.close()
if returncode != 0:
raise Exception(stderr)
except Exception as e:
raise Exception('Error in \'' + name + '\'. \n' + str( e ))
def check_output ( output, canBeEmpty ):
if 0 < os.path.getsize( output ):
return True
elif False == canBeEmpty:
raise Exception('The output file is empty:' + output)
def __main__():
#Parse Command Line
parser = optparse.OptionParser()
parser.add_option( '-a', '--alignmentScore', action='store_true', dest='alignmentScore', default=False, help='split alignments by alignment score instead of mapping quality' )
parser.add_option( '-b', '--bwa', action='store_true', dest='bwa', default=False, help='alignments are from BWA (SOLiD only)' )
parser.add_option( '-c', '--colorSpace', action='store_true', dest='colorSpace', default=False, help='generate reads in color space (SOLiD reads)' )
parser.add_option( '-d', '--scoreFactor', dest='scoreFactor', type='int', help='divide quality/alignment score by this factor' )
parser.add_option( '-g', '--wiggle', dest='wiggle', type='int', help='gap "wiggle"' )
parser.add_option( '-n', '--numReads', dest='numReads', type='int', help='number of raw input paired-end reads (otherwise, inferred from all SAM records present).' )
parser.add_option( '-q', '--minMapq', dest='minMapq', type='int', help='consider only alignments with this mapping quality or greater.' )
parser.add_option( '-z', '--singleEnd', action='store_true', dest='singleEnd', default=False, help='input contains only single end reads' )
parser.add_option( '-S', '--sam', dest='sam', default=None, help='input SAM' )
parser.add_option( '-B', '--bam', dest='bam', default=None, help='input BAM' )
parser.add_option( '-p', '--printIncorrect', action='store_true', dest='printIncorrect', default=False, help='print incorrect alignments' )
parser.add_option( '-s', '--numSnps', dest='numSnps', type="int", help='consider only alignments with the number of specified SNPs' )
parser.add_option( '-e', '--numErrors', dest='numErrors', type="int", default=False, help='consider only alignments with the number of specified errors' )
parser.add_option( '-i', '--indels', action='store_true', dest='indels', default=False, help='consider only alignments with indels' )
parser.add_option( '-o', '--output', dest='output', help='The file to save the output' )
(options, args) = parser.parse_args()
# output version # of tool
try:
tmp = tempfile.NamedTemporaryFile().name
tmp_stdout = open( tmp, 'wb' )
proc = subprocess.Popen( args='dwgsim_eval 2>&1', shell=True, stdout=tmp_stdout )
tmp_stdout.close()
returncode = proc.wait()
stdout = None
for line in open( tmp_stdout.name, 'rb' ):
if line.lower().find( 'version' ) >= 0:
stdout = line.strip()
break
if stdout:
sys.stdout.write( '%s\n' % stdout )
else:
raise Exception
except:
sys.stdout.write( 'Could not determine DWGSIM_EVAL version\n' )
buffsize = 1048576
# make temp directory for dwgsim, requires trailing slash
tmp_dir = '%s/' % tempfile.mkdtemp()
#'generic' options used in all dwgsim commands here
try:
tmp_dir = '%s/' % tempfile.mkdtemp()
dwgsim_eval_cmd = 'dwgsim_eval'
if True == options.alignmentScore:
dwgsim_eval_cmd = dwgsim_eval_cmd + ' -a'
if True == options.bwa:
dwgsim_eval_cmd = dwgsim_eval_cmd + ' -b'
if True == options.colorSpace:
dwgsim_eval_cmd = dwgsim_eval_cmd + ' -c'
use_p = False
if 0 <= options.numSnps:
use_p = True
dwgsim_eval_cmd = dwgsim_eval_cmd + (' -s %s' % options.numSnps)
if 0 <= options.numErrors:
use_p = True
dwgsim_eval_cmd = dwgsim_eval_cmd + (' -e %s' % options.numErrors)
if True == options.indels:
use_p = True
dwgsim_eval_cmd = dwgsim_eval_cmd + ' -i'
if True == use_p or True == options.printIncorrect:
dwgsim_eval_cmd = dwgsim_eval_cmd + ' -p'
if True == options.singleEnd:
dwgsim_eval_cmd = dwgsim_eval_cmd + ' -z'
dwgsim_eval_cmd = '%s -d %s -g %s -n %s -q %s' % (dwgsim_eval_cmd, \
options.scoreFactor, \
options.wiggle, \
options.numReads, \
options.minMapq)
if None != options.sam:
dwgsim_eval_cmd = dwgsim_eval_cmd + ' -S ' + options.sam
elif None != options.bam:
dwgsim_eval_cmd = dwgsim_eval_cmd + ' ' + options.bam
else:
raise Exception('Input file was neither a SAM nor BAM')
dwgsim_eval_cmd = dwgsim_eval_cmd + ' > ' + options.output
# need to nest try-except in try-finally to handle 2.4
try:
# dwgsim
run_process ( dwgsim_eval_cmd, 'dwgsim', tmp_dir, buffsize )
# check that there are results in the output file
check_output ( options.output, False )
sys.stdout.write( 'DWGSIM_EVAL successful' )
except Exception as e:
stop_err( 'DWGSIM_EVAL failed.\n' + str( e ) )
finally:
# clean up temp dir
if os.path.exists( tmp_dir ):
shutil.rmtree( tmp_dir )
if __name__=="__main__": __main__()
|