1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
|
#!/usr/bin/env python
import sys
import numpy as np
import glob
from optparse import OptionParser
import l_bp
def main():
usage ="""%prog <VCF file 1> <VCF file 2> ... <VCF file N>
l_sort
Author: Ryan Layer, Colby Chiang, & Ira Hall
Description: sort N VCF files into a single file
Version: 0.01
"""
if len(sys.argv) < 2:
exit(usage)
vcf_file_names = sys.argv[1:]
vcf_lines = []
vcf_headers = list()
for vcf_file_name in vcf_file_names:
samples = l_bp.parse_vcf(vcf_file_name, vcf_lines, vcf_headers)
for sample in samples:
vcf_headers.append("##SAMPLE=<ID=" + sample + ">\n")
vcf_headers.append("##INFO=<ID=SNAME,Number=.,Type=String," + \
"Description=\"Source sample name\">\n")
vcf_headers.append("##INFO=<ID=ALG,Number=1,Type=String," + \
"Description=\"Evidence PDF aggregation algorithm\">\n")
vcf_headers.append("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\t" + \
"VARIOUS\n")
vcf_headers = list(vcf_headers)
vcf_headers.sort(cmp=l_bp.header_line_cmp)
for h in vcf_headers:
print(h, end=' ')
vcf_lines.sort(cmp=l_bp.vcf_line_cmp)
for v in vcf_lines:
# if 'SVTYPE=BND' in v and (('--:' in v) != ('++' in v)):
# A = v.split('\t')
# neg_s = A[7].find('--:')
# pos_s = A[7].find('++:')
#
# if neg_s > 0:
# neg_e = neg_s + A[7][neg_s:].find(';')
# pre=A[7][:neg_s]
# mid=A[7][neg_s:neg_e]
# post=A[7][neg_e:]
# A[7] = pre + '++:0,' + mid + post
# else:
# pos_e = pos_s + A[7][pos_s:].find(';')
# pre=A[7][:pos_s]
# mid=A[7][pos_s:pos_e]
# post=A[7][pos_e:]
# A[7] = pre + mid + ',--:0' + post
# print '\t'.join(A)
# else:
print(v, end=' ')
if __name__ == "__main__":
sys.exit(main())
|