File: test-anno.py

package info (click to toggle)
vcfanno 0.3.5%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 14,112 kB
  • sloc: python: 335; sh: 259; makefile: 42
file content (32 lines) | stat: -rw-r--r-- 1,068 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
"""
If we annotate ExAC with ExAC, we can test that all variants are annotated with
themselves
"""

import toolshed as ts
from hts import Tbx
import sys

tbx = Tbx(sys.argv[1])
fh = ts.nopen(1)

fields = ["AC_Adj", "AN_Adj", "AC_AFR", "AN_AFR", "AC_AMR", "AN_AMR", "AN_EAS",
        "AC_EAS", "AN_FIN", "AN_NFE", "AN_OTH", "AN_SAS", "AC_FIN", "AC_NFE",
        "AC_OTH", "AC_SAS"]
for toks in ts.reader(fh, header=False):
    if toks[0] == "#CHROM": break
toks[0] = 'CHROM'

for d in ts.reader(fh, header=toks):

    info = {x[0]: x[1] for x in (v.split("=") for v in d['INFO'].split(";") if '=' in v)}

    for f in fields:
        # if it's not the same, it overlapped many sites.
        if info[f] != info["exac" + f]:
            assert info[f] in info["exac" + f].split("|"), (d, f, info[f], info["exac" + f])
            r = list(tbx("%s:%d-%d" % (d['CHROM'], int(d['POS']) - 1, int(d['POS']) + len(d['REF']) - 1)))
            if len(r) < 2:
                print (d['CHROM'], d['POS'], d['REF'], d['ALT'], f, info[f], info["exac" + f])
                break