File: utils.py

package info (click to toggle)
qcumber 2.3.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 2,276 kB
  • sloc: python: 3,097; sh: 153; makefile: 18
file content (68 lines) | stat: -rwxr-xr-x 2,009 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/python3
import fileinput
import matplotlib.pyplot as plt
import numpy

def which(program):
    import os
    def is_exe(fpath):
        return os.path.isfile(fpath) and os.access(fpath, os.X_OK)

    fpath, fname = os.path.split(program)
    if fpath:
        if is_exe(program):
            return program
    else:
        for path in os.environ["PATH"].split(os.pathsep):
            exe_file = os.path.join(path, program)
            if is_exe(exe_file):
                return exe_file

    return None

def calculate_insert_length(samfile, statsfile, imagefile, insertsizefile):
    minlen=1000000
    maxlen=0
    totlen = 0
    lines = 0
    alllens = []
    for line in fileinput.input(samfile):
        if line[0] == "@":
            continue
        dat = line.split("\t")
        if dat[8] == "0":
            continue
        inslen = abs(int(dat[8])) 
        alllens += [inslen]
        totlen += inslen
        lines += 1
        if inslen < minlen:
            minlen = inslen
        if inslen > maxlen:
            maxlen = inslen
    try:
        avg_insert = float(totlen)/float(lines)
        f = open(statsfile, "w")
        f.write("Average\tMinimum\tMaximum\n")
        f.write(str(avg_insert) + "\t" + str(minlen) + "\t" + str(maxlen))
        f.close()
        topoutlier = numpy.percentile(alllens, 75)+1.5*(numpy.percentile(alllens, 75)-numpy.percentile(alllens, 25))
        cutlens = [x for x in alllens if x <= 5*topoutlier]
        f = open(insertsizefile, "w")
        f.write(",".join([str(x) for x in alllens]))
        f.close()
        plt.hist(cutlens, bins=100)
        plt.xlabel("Template length")
        plt.ylabel("Density of reads")
        plt.savefig(imagefile)
    except:
        f = open(statsfile, "w")
        f.write("Average\tMinimum\tMaximum\n")
        f.write("0\t0\t0")
        f.close()
        f = open(imagefile, "w")
        f.write(" ")
        f.close()
        f = open(insertsizefile, "w")
        f.write("0,0")
        f.close()