File: vcfAnnFirst.py

package info (click to toggle)
snpeff 5.2.f%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 701,384 kB
  • sloc: java: 62,547; perl: 2,279; sh: 1,185; python: 744; xml: 507; makefile: 50
file content (48 lines) | stat: -rwxr-xr-x 979 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env python3

import sys

debug = True

# Find an 'ANN' or 'EFF' field
def vcfReplaceFirstAnn(line):
	f = line.split('\t')
	infos = f[7].split(';')

	infosChanged = False

	# For every INFO field
	for i in range(0, len(infos)):
		info = infos[i]
		if info.startswith('ANN=') or info.startswith('EFF='):
			# Parse 'ANN' field
			name = info[0:4]
			anns = info[4:]
			firstAnn = anns.split(',')[0]

			# Replace field by forst annotation only
			info = name + firstAnn
			infos[i] = info
			infosChanged = True

	if infosChanged:
		# Show new fields
		f[7] = ';'.join(infos)
		print('\t'.join(f))
	else :
		# No change, just show original line
		print(line)

#------------------------------------------------------------------------------
# Main
#------------------------------------------------------------------------------

# Read VCF form STDIN
for l in sys.stdin:
	l = l.rstrip()

	if l.startswith('#') :
		# Show header
		print(l)
	else :
		vcfReplaceFirstAnn(l)