File: tier_gene_trans_alignments.tiers_to_boxplot.pl

package info (click to toggle)
trinityrnaseq 2.11.0%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 417,528 kB
  • sloc: perl: 48,420; cpp: 17,749; java: 12,695; python: 3,124; sh: 1,030; ansic: 983; makefile: 688; xml: 62
file content (80 lines) | stat: -rwxr-xr-x 2,005 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env perl

use strict;
use warnings;
use File::Basename;

my $usage = "usage: $0 fileA.tiers [fileB.tiers ...]\n\n";

my @tier_files = @ARGV or die $usage;



main: {

    foreach my $tier_file (@tier_files) {
        
        print STDERR "// processing $tier_file\n";
        
        my @top_tier_per_gene = &get_top_tier_per_gene($tier_file);
        
        open (my $ofh, ">$tier_file.top_tiers") or die $!;
        print $ofh join("\n", @top_tier_per_gene) . "\n";
        close $ofh;
        
    }

    ## generate boxplot
    open (my $ofh, ">__tmp_tiers_boxplot.R") or die $!;
    my @name_tokens;
    foreach my $tier_file (@tier_files) {
        my $token = basename($tier_file);
        $token =~ s/\W/_/g;
        push (@name_tokens, $token);
        print $ofh "$token = read.table(\"$tier_file.top_tiers\", header=F);\n";
    }
    print $ofh "pdf(file=\"tiers.boxplot.pdf\");\n";
    print $ofh "boxplot(c(" . join(",", @name_tokens) . "), names=c(\"" . join("\",\"", @name_tokens) . "\"), outline=T, las=2);\n";
    print $ofh "dev.off();\n";
    close $ofh;
    
    system("R --no-save --no-restore --no-site-file --no-init-file -q < __tmp_tiers_boxplot.R");

    exit($?);
}

####
sub get_top_tier_per_gene {
    my ($tier_file) = @_;

    my %gene_to_top_tier;

    open (my $fh, $tier_file) or die $!;
    while (<$fh>) {
        #print;
        
        chomp;
        
        if (/^Tier\[(\d+)\]\s+(\S+)/) {
            my $tier_val = $1;
            my $gene_id = $2;
            if ( (! exists $gene_to_top_tier{$gene_id}) 
                 ||
                 $gene_to_top_tier{$gene_id} < $tier_val) {
                
                $gene_to_top_tier{$gene_id} = $tier_val;
                
                #print STDERR "$gene_id => $tier_val\t" . substr($_, 0, 100) . "\n";
            }
        }
        else {
            die "Error, cannot parse $_";
        }
        
    }
    close $fh;

    my @vals = values %gene_to_top_tier;

    return(@vals);
}