1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
|
#!/usr/bin/perl
#------------------------------------------------------------------------------
#
# Mark snps as X1, X2 or 'Both'
#
#------------------------------------------------------------------------------
use strict;
my($debug) = 0;
#------------------------------------------------------------------------------
# Read a file and index lines by SNP
#------------------------------------------------------------------------------
sub readSnps($) {
my($file) = (@_);
my($l, %snps);
open SNP, $file || die "Cannot open file '$file'\n";
while( $l = <SNP> ) {
my($chr, $pos, $ref, $var) = split /\t/, $l;
my($snp) = "$chr:$pos\_$ref/$var";
$snps{$snp} .= $l;
}
close SNP;
return %snps;
}
#------------------------------------------------------------------------------
# Print SNP info and quals
#------------------------------------------------------------------------------
sub printLine($$$$) {
my($snp, $lines, $quals, $q) = (@_);
my($line, @lines);
(@lines) = split '\n', $lines;
foreach $line ( @lines ) {
my($l) = replaceSnpQ($line, $q);
print "$l\t$quals\n";
}
}
#------------------------------------------------------------------------------
# Parse snp quality parameter
#------------------------------------------------------------------------------
sub parseSnpQ($) {
my($l) = @_;
my(@t);
(@t) = split /\t/,$l;
return $t[6];
}
#------------------------------------------------------------------------------
# Replace a quality
#------------------------------------------------------------------------------
sub replaceSnpQ($$) {
my($line, $q) = @_;
my(@t);
(@t) = split /\t/, $line;
$t[1] = $q;
return join("\t", @t);
}
#------------------------------------------------------------------------------
# Main
#------------------------------------------------------------------------------
# Read arguments
my(@file);
(@file) = @ARGV;
if( $#file <= 0 ) { die "Usage: ./joinSnpEff.pl tag1 file1 tag2 file2 ... tagN fileN\n"; }
# Parse arguments
print STDERR "Reading files:\n";
my($i, $j, $file, $tag, $snp, @snpsAll, %snps, @tags);
for( $i=0 , $j=0 ; $i < $#ARGV ; $i+=2, $j++ ) {
# Read tag
$tags[$j] = $tag = $ARGV[$i];
# Read file
$file = $ARGV[$i+1];
if( $file eq '' ) { die "Missing file for tag '$tag'\n"; }
print STDERR "\tTags[$j]: $tag\t'$file'\n";
%snps = readSnps($file);
# Add all snps
foreach $snp ( keys %snps ) { $snpsAll[$j]->{$snp} = $snps{$snp}; }
}
#---
# Print SNPS
#---
my($snp, %done, %snpsi);
my($j, $jj);
$i = 0;
print STDERR "Joining SNP from all files\n";
for( $i=0 ; $i <= $#tags ; $i++ ) { # For all tags
print "TAG:\ttags[$i] = '$tags[$i]'\n" if $debug;
my($uniq, $shared) = (0, 0);
%snpsi = %{$snpsAll[$i]};
foreach $snp (sort keys %snpsi) { # For all snps...
if( ! $done{$snp} ) { # Not done yet?
# Get qualities from all SNPs
my($quals, $qSum, $qCount) = ("", 0, 0);
my($all) = "ALL ";
for( $j=0, $jj=1 ; $j <= $#snpsAll ; $j++ , $jj++ ) {
if( exists $snpsAll[$j]{$snp} ) {
my($q) = parseSnpQ($snpsAll[$j]->{$snp});
$quals .= "$tags[$j]:$q ";
$qSum += $q;
$qCount++;
} else { $all = ""; }
}
if( $qCount <= 1 ) { $uniq++; } # Count unique SNPs for this file
else { $shared++; }
$done{$snp} = 1;
my($qAvg) = ( $qCount > 0 ? int($qSum/$qCount) : 0);
printLine($snp, $snpsi{$snp}, "$all $quals", $qAvg);
} else { $shared++; }
}
print STDERR "\tTags[$i]: $tags[$i]\tUnique / Shared snps: $uniq / $shared\n";
}
|