File: remove_non_biallelic.py

package info (click to toggle)
discosnp 1%3A2.6.2-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,656 kB
  • sloc: python: 5,893; sh: 2,966; cpp: 2,692; makefile: 14
file content (39 lines) | stat: -rw-r--r-- 1,148 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import sys
import gzip

if len(sys.argv)<2:
    print ("This tool removes from discoSnp sorted VCF the locus which are tri-allelic or more")
    print ("python3 remove_non_diploids.py \".vcf from discoSnp\" ")
    sys.exit()


if "gz" in sys.argv[1]:
    coherent_file=gzip.open(sys.argv[1],"r")
else: 
    coherent_file=open(sys.argv[1],"r")

previous_line=""
previous_chr=""
previous_pos=""
previous_triploid=False

while True:    
    line = coherent_file.readline().rstrip()
    if not line: break
    if line[0]=='#': 
        print (line)
        continue
#    SNP_higher_path_3       196     3       C       G       .       .       Ty=SNP;Rk=1;UL=86;UR=261;CL=166;CR=761;Genome=.;Sd=.    GT:DP:PL:AD:HQ  0/0:124:10,378,2484:124,0:0,0   1/1:134:2684,408,10:0,134:0,0
    splitted_line = line.split()
    chr=splitted_line[0]
    pos=splitted_line[1]
    if chr==previous_chr and pos==previous_pos:
        previous_triploid=True
        continue
    else:
        if not previous_triploid and len(previous_line)>0:
            print (previous_line)
    previous_chr=chr
    previous_pos=pos
    previous_triploid=False
    previous_line=line