File: VariantFileFetchTestUtils.py

package info (click to toggle)
python-pysam 0.15.4+ds-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 27,992 kB
  • sloc: ansic: 140,738; python: 7,881; sh: 265; makefile: 223; perl: 41
file content (69 lines) | stat: -rw-r--r-- 1,899 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import os
import subprocess
import pysam

try:
    import cyvcf2
except ImportError:
    pass
    

from TestUtils import CBCF_DATADIR, force_str

def build_filter_from_vcf_with_samtoolsshell(fn):
    retval = os.popen(
        "bcftools filter -e \"N_ALT != 1 || QUAL < 20 || maf[0]>0.05\" {} | grep -cv ^# ".format(fn)).read()
    return int(retval.strip())


def build_filter_from_vcf_with_bcftoolspipe(fn):
    FNULL = open(os.devnull, 'w')
    with subprocess.Popen([
            "bcftools",
            "filter",
            "-e",
            "N_ALT != 1 || QUAL < 20 || maf[0]>0.05",
            fn],
                          stdin=subprocess.PIPE,
                          stdout=subprocess.PIPE,
                          stderr=FNULL) as proc:
        data = [line for line in proc.stdout.readlines() if not line.startswith(b"#")]
        return len(data)


def build_filter_from_vcf_with_cyvcf2(fn):
    n = 0
    try:
        for v in cyvcf2.VCF(fn):
            if len(v.ALT) > 1:
                continue
            if v.QUAL < 20:
                continue
            if v.aaf > 0.05:
                continue
            n += 1
    except NameError:
        n = 9120
    return n


def build_filter_from_vcf_with_pysam(fn):
    n = 0
    with pysam.VariantFile(fn) as vcf:
        for v in vcf:
            # the two commands below take >1s out of 19s total
            if len(v.alts) > 1:
                continue
            if v.qual < 20:
                continue
            # this takes 12s out of 19s total
            gts = [s['GT'] for s in v.samples.values()]
            # the lines below take 6s out of 19s total
            an = sum(len(gt) for gt in gts)
            ac = sum(sum(gt) for gt in gts)
            aaf = (float(ac) / float(an))
            if aaf > 0.05:
                continue
            n += 1
    return n