File: compute_base_probs.pl

package info (click to toggle)
transdecoder 3.0.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 11,356 kB
  • ctags: 274
  • sloc: perl: 5,454; sh: 81; makefile: 51
file content (67 lines) | stat: -rwxr-xr-x 1,315 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/env perl

use strict;
use warnings;

use FindBin;
use lib ("$FindBin::Bin/../PerlLib");
use Fasta_reader;
use Nuc_translator;

my $usage = "usage: $0 transcripts_file [top_strand_only]\n\n";

my $transcripts_file = $ARGV[0] or die $usage;
my $top_strand_only_flag = $ARGV[1] || 0;

main: {

    my %base_counter;

    my $fasta_reader = new Fasta_reader($transcripts_file);
    while (my $seq_obj = $fasta_reader->next()) {

        my $sequence = uc $seq_obj->get_sequence();

        &count_bases($sequence, \%base_counter);

        unless ($top_strand_only_flag) {
            $sequence = &reverse_complement($sequence);
            &count_bases($sequence, \%base_counter);
        }
        
    }

    
    my $sum = 0;
    foreach my $count (values %base_counter) {
        $sum += $count;
    }

    foreach my $base (sort keys %base_counter) {
        
        my $count = $base_counter{$base};

        my $ratio = $count/$sum;
        
        print join("\t", $base, $count, sprintf("%.3f", $ratio)) . "\n";
    }
    

    exit(0);
}

####
sub count_bases {
    my ($sequence, $base_counter_href) = @_;

    my @chars = split(//, $sequence);
    
    foreach my $char (@chars) {
        if ($char =~ /[GATC]/) {
            $base_counter_href->{$char}++;
        }
    }
    
    return;
}