File: remove_non_covered_genotypes.py

package info (click to toggle)
discosnp 1%3A2.6.2-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,656 kB
  • sloc: python: 5,893; sh: 2,966; cpp: 2,692; makefile: 14
file content (40 lines) | stat: -rw-r--r-- 1,310 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import sys
import gzip

if len(sys.argv)<3:
    print ("This tool replaces discoSnp VCF genotypes with DP lower or equal to a threshold to \"./.\"")
    print ("python3 remove_non_covered_genotypes.py \".vcf from discoSnp\" \"DP threshold\"")
    sys.exit()


if "gz" in sys.argv[1]:
    coherent_file=gzip.open(sys.argv[1],"r")
else: 
    coherent_file=open(sys.argv[1],"r")
dp_threshold = float(sys.argv[2])

while True:
    
    
    line = coherent_file.readline().rstrip()
    if not line: break
    if line[0]=='#': 
        print (line)
        continue
    
#    SNP_higher_path_3       196     3       C       G       .       .       Ty=SNP;Rk=1;UL=86;UR=261;CL=166;CR=761;Genome=.;Sd=.    GT:DP:PL:AD:HQ  0/0:124:10,378,2484:124,0:0,0   1/1:134:2684,408,10:0,134:0,0
    splitted_line = line.split()
    
    toprint=""
    for i in range(9):
        toprint+=splitted_line[i]+'\t'
    for i in range(9,len(splitted_line)):
        splitted_geno = splitted_line[i].split(':')
        if int(splitted_geno[1])<=dp_threshold: toprint+= "./.:"
        else: toprint+= splitted_geno[0]+":"
        
        for j in range(1,len(splitted_geno)):
            toprint+= splitted_geno[j]
            if j<len(splitted_geno)-1: toprint+= ":"
        if i<len(splitted_line): toprint+='\t'
    print (toprint)