1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
|
#!/usr/bin/env perl
use strict;
use warnings;
my $usage = "usage: $0 genome.dexseq superT.dexseq\n\n";
my $genome_dexseq = $ARGV[0] or die $usage;
my $superT_dexseq = $ARGV[1] or die $usage;
main: {
my %genome_signif = &parse_signif_scores($genome_dexseq);
my %superT_signif = &parse_signif_scores($superT_dexseq);
print join("\t", "gene", "g_padj", "g_log2fc", "s_padj", "s_log2fc") . "\n";
foreach my $gene (keys %genome_signif) {
my $genome_best = $genome_signif{$gene};
my $superT_best = $superT_signif{$gene};
my ($genome_best_padj, $genome_best_log2fold) = ("NA", "NA");
my ($superT_best_padj, $superT_best_log2fold) = ("NA", "NA");
if ($genome_best) {
($genome_best_padj, $genome_best_log2fold) = ($genome_best->{padj}, $genome_best->{log2fold});
}
if ($superT_best) {
($superT_best_padj, $superT_best_log2fold) = ($superT_best->{padj}, $superT_best->{log2fold});
}
print join("\t", $gene,
$genome_best_padj, $genome_best_log2fold,
$superT_best_padj, $superT_best_log2fold) . "\n";
}
exit(0);
}
####
sub parse_signif_scores {
my ($results_file) = @_;
my %gene_to_scores;
open(my $fh, $results_file) or die $!;
my $header = <$fh>;
while(<$fh>) {
chomp;
my @x = split(/\t/);
my $gene = $x[1];
my $log2fold = $x[9];
my $padj = $x[6];
if ($padj eq "NA") { next; }
push (@{$gene_to_scores{$gene}}, { log2fold => $log2fold,
padj => $padj,
} );
}
close $fh;
my %gene_to_best;
foreach my $gene (keys %gene_to_scores) {
my @scores = @{$gene_to_scores{$gene}};
@scores = sort {$a->{padj}<=>$b->{padj}
||
abs($b->{log2fold}) <=> abs($a->{log2fold}) } @scores;
my $best = shift @scores;
$gene_to_best{$gene} = $best;
}
return(%gene_to_best);
}
|