1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
|
import os
import subprocess
import pysam
try:
import cyvcf2
except ImportError:
pass
from TestUtils import CBCF_DATADIR, force_str
def build_filter_from_vcf_with_samtoolsshell(fn):
retval = os.popen(
"bcftools filter -e \"N_ALT != 1 || QUAL < 20 || maf[0]>0.05\" {} | grep -cv ^# ".format(fn)).read()
return int(retval.strip())
def build_filter_from_vcf_with_bcftoolspipe(fn):
FNULL = open(os.devnull, 'w')
with subprocess.Popen([
"bcftools",
"filter",
"-e",
"N_ALT != 1 || QUAL < 20 || maf[0]>0.05",
fn],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=FNULL) as proc:
data = [line for line in proc.stdout.readlines() if not line.startswith(b"#")]
return len(data)
def build_filter_from_vcf_with_cyvcf2(fn):
n = 0
try:
for v in cyvcf2.VCF(fn):
if len(v.ALT) > 1:
continue
if v.QUAL < 20:
continue
if v.aaf > 0.05:
continue
n += 1
except NameError:
n = 9120
return n
def build_filter_from_vcf_with_pysam(fn):
n = 0
with pysam.VariantFile(fn) as vcf:
for v in vcf:
# the two commands below take >1s out of 19s total
if len(v.alts) > 1:
continue
if v.qual < 20:
continue
# this takes 12s out of 19s total
gts = [s['GT'] for s in v.samples.values()]
# the lines below take 6s out of 19s total
an = sum(len(gt) for gt in gts)
ac = sum(sum(gt) for gt in gts)
aaf = (float(ac) / float(an))
if aaf > 0.05:
continue
n += 1
return n
|