File: extract-sites.py

package info (click to toggle)
lumpy-sv 0.3.1%2Bdfsg-7
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, sid, trixie
  • size: 296,072 kB
  • sloc: cpp: 9,908; python: 1,768; sh: 1,384; makefile: 365; ansic: 322; perl: 58
file content (33 lines) | stat: -rw-r--r-- 739 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33

import sys
import gzip
import collections

def xopen(f):
    return (gzip.open if f.endswith(".gz") else open)(f)

sites = collections.OrderedDict()
files = sys.argv[1:]

def key(toks):
    return (toks[0], toks[1], toks[3], toks[4])

def anynonref(gts):
    return any(gt.startswith(("0/1", "1/1")) for gt in gts)


header = []
for f in files:
    header = []
    for toks in (l.rstrip().split("\t") for l in xopen(f)):
        if toks[0][0] == "#":
            if toks[0] == "#CHROM":
                toks = toks[:8]
            header.append("\t".join(toks))
            continue
        if anynonref(toks[9:]):
            sites[key(toks)] = "\t".join(toks[:8])

print("\n".join(header))
for k, line in sites.items():
    print(line)