File: vcfEffOnePerLine.pl

package info (click to toggle)
snpeff 5.2.f%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 701,384 kB
  • sloc: java: 62,547; perl: 2,279; sh: 1,185; python: 744; xml: 507; makefile: 50
file content (67 lines) | stat: -rwxr-xr-x 1,614 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/perl

#-------------------------------------------------------------------------------
#
# Read a VCF file (via STDIN), split EFF fields from INFO column into many lines
# leaving one line per effect.
#
# Note: In lines having multiple effects, all other information will be 
#       repeated. Only the 'EFF' field will change.
#
#															Pablo Cingolani 2012
#-------------------------------------------------------------------------------

$INFO_FIELD_NUM = 7;

while( $l = <STDIN> ) {
	# Show header lines
	if( $l =~ /^#/ ) { print $l; }	
	else {
		chomp $l;
		$l =~ tr/\n\r//d;

		@t = @infos = @effs = (); # Clear arrays

		# Non-header lines: Parse fields
		@t = split /\t/, $l;

		# Get INFO column
		$info = $t[ $INFO_FIELD_NUM ];

		# Parse INFO column 
		@infos = split /;/, $info;

		# Find EFF field
		$infStr = "";
		foreach $inf ( @infos ) {
			# Is this the EFF field? => Find it and split it
			if( $inf =~/^EFF=(.*)/ ) { 
				@effs = split /,/, $1; 
				$fieldName = "EFF";
			} elsif( $inf =~/^ANN=(.*)/ ) { 
				@effs = split /,/, $1; 
				$fieldName = "ANN";
			} else { 
				$infStr .= ( $infStr eq '' ? '' : ';' ) . $inf; 
			}
		}	

		# Print VCF line
		if( $#effs <= 0 )	{ print "$l\n"; }	# No EFF found, just show line
		else {
			$pre = "";
			for( $i=0 ; $i < $INFO_FIELD_NUM ; $i++ ) {
				$pre .= ( $i > 0 ? "\t" : "" ) . "$t[$i]"; 
			}

			$post = "";
			for( $i=$INFO_FIELD_NUM+1 ; $i <= $#t ; $i++ ) {
				$post .= "\t$t[$i]"; 
			}

			foreach $eff ( @effs ) {
				print "$pre\t$infStr" . ( $infStr eq '' ? '' : ';' ) . "$fieldName=$eff$post\n" ; 
			}
		}
	}
}