File: score_CDS_liklihood_all_6_frames.pl

package info (click to toggle)
transdecoder 3.0.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 11,356 kB
  • ctags: 274
  • sloc: perl: 5,454; sh: 81; makefile: 51
file content (90 lines) | stat: -rwxr-xr-x 1,895 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/env perl

use strict;
use warnings;

use FindBin;
use lib ("$FindBin::Bin/../PerlLib");
use Fasta_reader;
use Nuc_translator;

my $usage = "usage: $0 CDS hexamerScores\n\n";

my $cds_file = $ARGV[0] or die $usage;
my $hexamer_scores_file = $ARGV[1] or die $usage;


my %scores = &parse_hexamer_scores($hexamer_scores_file);

main: {
		
	my $fasta_reader = new Fasta_reader($cds_file);
	while (my $seq_obj = $fasta_reader->next()) {
		
		my $accession = $seq_obj->get_accession();
		my $sequence = uc $seq_obj->get_sequence();

		my $score1 = &score_CDS($sequence);				
		my $score2 = &score_CDS(substr($sequence, 1));
		my $score3 = &score_CDS(substr($sequence, 2));

		my $rev_seq = &reverse_complement($sequence);
		
		my $score4 = &score_CDS($rev_seq);
		my $score5 = &score_CDS(substr($rev_seq, 1));
		my $score6 = &score_CDS(substr($rev_seq, 2));
		
		printf("$accession\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n",
			   length($sequence), 
               $score1, $score2, $score3,
			   $score4, $score5, $score6);
		
	}
	
	exit(0);

}

####
sub score_CDS {
	my ($sequence) = @_;
	
	my $seq_length = length($sequence);
	
	if ($seq_length < 5) {
		return(0);
	}

	## init score to first pentamer
	my $pentamer = substr($sequence, 0, 5);
	my $framed_pentamer = "${pentamer}-0";
	my $score = $scores{$framed_pentamer} || 0;
	
	for (my $i = 5; $i <= $seq_length - 6; $i++) {
		my $hexamer = substr($sequence, $i, 6);
		my $frame = $i % 3;
		my $framed_hexamer = "${hexamer}-${frame}";
		my $hex_score = $scores{$framed_hexamer} || 0;
		$score += $hex_score;
	}
	
	return ($score);
}


####
sub parse_hexamer_scores {
	my ($hexamer_scores_file) = @_;

	my %scores;
	open (my $fh, $hexamer_scores_file) or die "Error, cannot open $hexamer_scores_file";
	while (<$fh>) {
		chomp;
		my ($token, $score) = split (/\t/);
		$scores{$token} = $score;
	}
	close $fh;

	return (%scores);
}