1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
|
#!/usr/bin/env python3
import sys
debug = True
# Find an 'ANN' or 'EFF' field
def vcfReplaceFirstAnn(line):
f = line.split('\t')
infos = f[7].split(';')
infosChanged = False
# For every INFO field
for i in range(0, len(infos)):
info = infos[i]
if info.startswith('ANN=') or info.startswith('EFF='):
# Parse 'ANN' field
name = info[0:4]
anns = info[4:]
firstAnn = anns.split(',')[0]
# Replace field by forst annotation only
info = name + firstAnn
infos[i] = info
infosChanged = True
if infosChanged:
# Show new fields
f[7] = ';'.join(infos)
print('\t'.join(f))
else :
# No change, just show original line
print(line)
#------------------------------------------------------------------------------
# Main
#------------------------------------------------------------------------------
# Read VCF form STDIN
for l in sys.stdin:
l = l.rstrip()
if l.startswith('#') :
# Show header
print(l)
else :
vcfReplaceFirstAnn(l)
|