#!/usr/bin/python3
import fileinput
import matplotlib.pyplot as plt
import numpy

def which(program):
    import os
    def is_exe(fpath):
        return os.path.isfile(fpath) and os.access(fpath, os.X_OK)

    fpath, fname = os.path.split(program)
    if fpath:
        if is_exe(program):
            return program
    else:
        for path in os.environ["PATH"].split(os.pathsep):
            exe_file = os.path.join(path, program)
            if is_exe(exe_file):
                return exe_file

    return None

def calculate_insert_length(samfile, statsfile, imagefile, insertsizefile):
    minlen=1000000
    maxlen=0
    totlen = 0
    lines = 0
    alllens = []
    for line in fileinput.input(samfile):
        if line[0] == "@":
            continue
        dat = line.split("\t")
        if dat[8] == "0":
            continue
        inslen = abs(int(dat[8])) 
        alllens += [inslen]
        totlen += inslen
        lines += 1
        if inslen < minlen:
            minlen = inslen
        if inslen > maxlen:
            maxlen = inslen
    try:
        avg_insert = float(totlen)/float(lines)
        f = open(statsfile, "w")
        f.write("Average\tMinimum\tMaximum\n")
        f.write(str(avg_insert) + "\t" + str(minlen) + "\t" + str(maxlen))
        f.close()
        topoutlier = numpy.percentile(alllens, 75)+1.5*(numpy.percentile(alllens, 75)-numpy.percentile(alllens, 25))
        cutlens = [x for x in alllens if x <= 5*topoutlier]
        f = open(insertsizefile, "w")
        f.write(",".join([str(x) for x in alllens]))
        f.close()
        plt.hist(cutlens, bins=100)
        plt.xlabel("Template length")
        plt.ylabel("Density of reads")
        plt.savefig(imagefile)
    except:
        f = open(statsfile, "w")
        f.write("Average\tMinimum\tMaximum\n")
        f.write("0\t0\t0")
        f.close()
        f = open(imagefile, "w")
        f.write(" ")
        f.close()
        f = open(insertsizefile, "w")
        f.write("0,0")
        f.close()
