1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
|
#!/usr/bin/python3
import fileinput
import matplotlib.pyplot as plt
import numpy
def which(program):
import os
def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
fpath, fname = os.path.split(program)
if fpath:
if is_exe(program):
return program
else:
for path in os.environ["PATH"].split(os.pathsep):
exe_file = os.path.join(path, program)
if is_exe(exe_file):
return exe_file
return None
def calculate_insert_length(samfile, statsfile, imagefile, insertsizefile):
minlen=1000000
maxlen=0
totlen = 0
lines = 0
alllens = []
for line in fileinput.input(samfile):
if line[0] == "@":
continue
dat = line.split("\t")
if dat[8] == "0":
continue
inslen = abs(int(dat[8]))
alllens += [inslen]
totlen += inslen
lines += 1
if inslen < minlen:
minlen = inslen
if inslen > maxlen:
maxlen = inslen
try:
avg_insert = float(totlen)/float(lines)
f = open(statsfile, "w")
f.write("Average\tMinimum\tMaximum\n")
f.write(str(avg_insert) + "\t" + str(minlen) + "\t" + str(maxlen))
f.close()
topoutlier = numpy.percentile(alllens, 75)+1.5*(numpy.percentile(alllens, 75)-numpy.percentile(alllens, 25))
cutlens = [x for x in alllens if x <= 5*topoutlier]
f = open(insertsizefile, "w")
f.write(",".join([str(x) for x in alllens]))
f.close()
plt.hist(cutlens, bins=100)
plt.xlabel("Template length")
plt.ylabel("Density of reads")
plt.savefig(imagefile)
except:
f = open(statsfile, "w")
f.write("Average\tMinimum\tMaximum\n")
f.write("0\t0\t0")
f.close()
f = open(imagefile, "w")
f.write(" ")
f.close()
f = open(insertsizefile, "w")
f.write("0,0")
f.close()
|