File: get_coverages.py

package info (click to toggle)
lumpy-sv 0.3.1%2Bdfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 296,072 kB
  • sloc: cpp: 9,908; python: 1,768; sh: 1,384; makefile: 365; ansic: 322; perl: 58
file content (67 lines) | stat: -rwxr-xr-x 1,823 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/local/bin/python
import sys
import subprocess
import os
import numpy as np

if len(sys.argv) < 2:
    print('usage:' + sys.argv[0] + ' <in bam 1> <in bam 2> <..>')
    exit(1)

for i in range(1,len(sys.argv)):
    bam_file = sys.argv[i]

    p = subprocess.Popen(\
            ['samtools', 'view', '-H', bam_file], \
            stdout=subprocess.PIPE)

    f = open(bam_file + '.genome', 'w')

    for l in p.communicate()[0].split('\n'):
        if '@SQ' in l:
            A = l.rstrip().split('\t')
            name = ''
            size = ''
            for a in A:
                if 'SN' in a:
                    name = a.split(':')[1]
                if 'LN' in a:
                    size = a.split(':')[1]
            f.write(name + '\t' + size + '\n')
    f.close()

    p = subprocess.Popen(\
            'bedtools genomecov ' + \
                ' -ibam ' +  bam_file + \
                ' -g ' + bam_file + '.genome' + \
                ' -bga ' + \
                ' > ' + bam_file + '.coverage',shell=True)
    os.waitpid(p.pid, 0)

for i in range(1,len(sys.argv)):
    bam_file = sys.argv[i]
    genome_file = bam_file + '.genome'
    f = open(genome_file,'r')
    total_len = 0.0
    for l in f:
        total_len += float(l.rstrip().split('\t')[1])
    f.close()

    coverage_file = bam_file + '.coverage'
    f = open(coverage_file, 'r')
    C = []
    W = []
    for l in f:
        a = l.rstrip().split('\t')
        if float(a[3]) > 0:
            C.append(float(a[3]))
            W.append((float(a[2])-float(a[1]))/total_len)
    min_c = min(C)
    max_c = max(C)
    mean_c = np.average(C,weights=W)
    stdev_c = np.std(C)
    print(coverage_file + \
            '\tmin:' + str(min_c) + \
            '\tmax:' + str(max_c) + \
            '\tmean(non-zero):' + str(mean_c))
    f.close()