File: examine_strand_specificity.pl

package info (click to toggle)
trinityrnaseq 2.11.0%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 417,528 kB
  • sloc: perl: 48,420; cpp: 17,749; java: 12,695; python: 3,124; sh: 1,030; ansic: 983; makefile: 688; xml: 62
file content (91 lines) | stat: -rwxr-xr-x 2,754 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env perl

use strict;
use warnings;
use lib ("/usr/lib/trinityrnaseq/PerlLib");
use SAM_reader;
use SAM_entry;
use Process_cmd;

my $usage = "\n\n\tusage: $0 transcript_aligned.bam [out_prefix='ss_analysis']\n\n";

my $bam_file = $ARGV[0] or die $usage;
my $out_prefix = $ARGV[1] || "ss_analysis";

main: {

    my %transcript_to_orients;

    my $sam_reader = new SAM_reader($bam_file);
    print STDERR "-parsing file: $bam_file\n";
    while (my $sam_entry = $sam_reader->get_next()) {

        my $trans_name = $sam_entry->get_scaffold_name();
        my $orient = $sam_entry->get_query_strand();


        if ($sam_entry->is_paired()) {
        
            unless ($sam_entry->is_proper_pair() && $sam_entry->is_first_in_pair()) {
                next;
            }
        }
                    
        $transcript_to_orients{$trans_name}->{$orient}++;
        
    }
    print STDERR "-done parsing file, examining orientations of reads.\n";
    
    ## sum them up.
    my @transcripts = keys %transcript_to_orients;
    foreach my $transcript (@transcripts) {
        
        my $orient_plus = $transcript_to_orients{$transcript}->{'+'} || 0;
        my $orient_minus = $transcript_to_orients{$transcript}->{'-'} || 0;
        

        my $total_reads = $orient_plus + $orient_minus;
        
        $transcript_to_orients{$transcript}->{'transcript'} = $transcript;
        $transcript_to_orients{$transcript}->{'total_reads'} = $total_reads;
    }

    ####
    my @structs = values %transcript_to_orients;
    @structs = reverse sort {$a->{total_reads}<=>$b->{total_reads}} @structs; 
    
    # header
    open (my $ofh, ">$out_prefix.dat") or die "Error, cannot write to $out_prefix.dat";
    
    print $ofh join("\t", "#transcript", "plus_strand_1stReads", "minus_strand_1stReads", "total_reads", "diff_ratio") . "\n";
    
    foreach my $struct (@structs) {
        
        my $struct = shift @structs;
        

        my ($plus, $minus, $total_reads, $transcript) = ($struct->{'+'},
                                                         $struct->{'-'},
                                                         $struct->{'total_reads'},
                                                         $struct->{'transcript'});
        unless ($plus) { $plus = 0; }
        unless ($minus) { $minus = 0; }

        my $diff_proportion = sprintf("%.3f", ($plus - $minus) / $total_reads);
        
        print $ofh join("\t", $transcript, $plus, $minus, $total_reads, $diff_proportion) . "\n";
        
    }
    close $ofh;


    ## plot it.
    my $cmd  = "/usr/lib/trinityrnaseq/util/misc/plot_strand_specificity_dist_by_quantile.Rscript $out_prefix.dat";
    &process_cmd($cmd);
    
    
    exit(0);
}