File: venn_pairwise_summaries.pl

package info (click to toggle)
trinityrnaseq 2.11.0%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 417,528 kB
  • sloc: perl: 48,420; cpp: 17,749; java: 12,695; python: 3,124; sh: 1,030; ansic: 983; makefile: 688; xml: 62
file content (123 lines) | stat: -rwxr-xr-x 2,970 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env perl

use strict;
use warnings;

use Carp;
use Getopt::Long qw(:config no_ignore_case bundling pass_through);


my $usage = <<__EOUSAGE__;

#############################################################################################

usage: $0 [--top_ranks <int>] pw1 pw2 ...

##############################################################################################
#
# --top_ranks <int>          restrict to number of top ranked entries per pairwise condition.
#
# --minFC <float>            minimum fold change in expression
#
##############################################################################################


__EOUSAGE__

    ;



my $help_flag;
my $top_ranks = -1;
my $minFC = 0;

&GetOptions ( 'h' => \$help_flag,

              'top_ranks=i' => \$top_ranks,
              'minFC=f' => \$minFC,


              );


my $minLogFC = 0;
if ($minFC) {
    $minLogFC = log($minFC)/log(2);
}


my @summaries = @ARGV or die $usage;

unless (scalar @summaries > 1) {
    die $usage;
}




main: {
    
    
    if ($top_ranks > 0) {
        print STDERR "NOTE: only $top_ranks top ranks from each will be reported.\n\n";
    }
    
    my %gene_samples_to_methods;
    my %gene_samples_to_ranks;

    foreach my $summary (@summaries) {
        
        my %pw_counter;
        open (my $fh, $summary) or die $!;
        while (<$fh>) {
            chomp;
            my ($acc, $sampleA, $sampleB, $exprA, $exprB, $logFC, $pval) = split(/\t/);
            
            if (abs($logFC) < $minLogFC) { next; }
            
            my $feature_sample_key = join("$;", $acc, sort ($sampleA, $sampleB));
            
            my $sample_key = join("$;", sort($sampleA, $sampleB));
            
            $pw_counter{$sample_key}++;
            #if ($top_ranks > 0 && $pw_counter{$sample_key} > $top_ranks) { next; }
            
            #print STDERR "$pw_counter{$sample_key}\n";
            
            push (@{$gene_samples_to_methods{$feature_sample_key}}, $summary);
            
            $gene_samples_to_ranks{$feature_sample_key}->{$summary} = $pw_counter{$sample_key};
            
        }
        close $fh;
    }

    ## output
    
    foreach my $feature_samples (keys %gene_samples_to_methods) {

        my @methods = sort (@{$gene_samples_to_methods{$feature_samples}});
        my @rankings;
        my $report_entry = ($top_ranks > 0) ? 0 : 1;
        foreach my $method (@methods) {
            my $rank = $gene_samples_to_ranks{$feature_samples}->{$method};
            push (@rankings, $rank);

            if ($top_ranks > 0 && $rank <= $top_ranks) {
                $report_entry = 1;
            }
            
        }
        
        if ($report_entry) {
            my ($feature, $sampleA, $sampleB) = split(/$;/, $feature_samples);
            
            print join("\t", $feature, $sampleA, $sampleB, join(",", @methods), join(",", @rankings) ) . "\n";
        }
    }
    
    exit(0);
}