File: compare_dexseq_results.pl

package info (click to toggle)
trinityrnaseq 2.15.2%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 468,004 kB
  • sloc: perl: 49,905; cpp: 17,993; java: 12,489; python: 3,282; sh: 1,989; ansic: 985; makefile: 717; xml: 62
file content (82 lines) | stat: -rwxr-xr-x 2,126 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env perl

use strict;
use warnings;

my $usage = "usage: $0 genome.dexseq  superT.dexseq\n\n";

my $genome_dexseq = $ARGV[0] or die $usage;
my $superT_dexseq = $ARGV[1] or die $usage;


main: {

    my %genome_signif = &parse_signif_scores($genome_dexseq);
    my %superT_signif = &parse_signif_scores($superT_dexseq);

    
    print join("\t", "gene", "g_padj", "g_log2fc", "s_padj", "s_log2fc") . "\n";
    foreach my $gene (keys %genome_signif) {
        my $genome_best = $genome_signif{$gene};
        my $superT_best = $superT_signif{$gene};

        my ($genome_best_padj, $genome_best_log2fold) = ("NA", "NA");
        my ($superT_best_padj, $superT_best_log2fold) = ("NA", "NA");

        if ($genome_best) {
            ($genome_best_padj, $genome_best_log2fold) = ($genome_best->{padj}, $genome_best->{log2fold});
        }

        if ($superT_best) {
            ($superT_best_padj, $superT_best_log2fold) = ($superT_best->{padj}, $superT_best->{log2fold});
        }
                
        print join("\t", $gene, 
                   $genome_best_padj, $genome_best_log2fold,
                   $superT_best_padj, $superT_best_log2fold) . "\n";
                           
    }

    exit(0);
}
 

####
sub parse_signif_scores {
    my ($results_file) = @_;

    my %gene_to_scores;
    
    open(my $fh, $results_file) or die $!;
    my $header = <$fh>;
    while(<$fh>) {
        chomp;
        my @x = split(/\t/);
        my $gene = $x[1];
        my $log2fold = $x[9];
        my $padj = $x[6];

        if ($padj eq "NA") { next; }
        
        push (@{$gene_to_scores{$gene}}, { log2fold => $log2fold,
                                           padj => $padj,
              } );

    }
    close $fh;

    my %gene_to_best;
    foreach my $gene (keys %gene_to_scores) {
        my @scores = @{$gene_to_scores{$gene}};

        @scores = sort {$a->{padj}<=>$b->{padj}
                        ||
                            abs($b->{log2fold}) <=> abs($a->{log2fold}) } @scores;

        my $best = shift @scores;
        $gene_to_best{$gene} = $best;
    }

    return(%gene_to_best);
}