File: transdecoder_pep_to_false_fusion_finder.pl

package info (click to toggle)
trinityrnaseq 2.11.0%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 417,528 kB
  • sloc: perl: 48,420; cpp: 17,749; java: 12,695; python: 3,124; sh: 1,030; ansic: 983; makefile: 688; xml: 62
file content (67 lines) | stat: -rwxr-xr-x 1,500 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/env perl

use strict;
use warnings;

## Just identifies and counts transcripts that are predicted to encode multiple ORFs

# of those that encode an ORF of min length, how many encode multiple ORFs of min length?

my $usage = "\n\nusage: $0 transdecoder.pep [min_prot_length=300]\n\n";

my $transdecoder_pep_file = $ARGV[0] or die $usage;
my $min_prot_length = $ARGV[1] || 300;



main: {

    # example transdecoder pep header
    # m.83 g.83  ORF g.83 m.83 type:complete len:773 (+) CUFF.100.1:5314-7632(+)
    
    my %acc_orf_counter;

    open (my $fh, $transdecoder_pep_file) or die $!;
    while (<$fh>) {
        chomp;
        if (/^>/) {
            /\s(\S+):\d+-\d+\([+-]\)/ or die "Error, cannot extract transcript accession from $_";
            my $trans_acc = $1;

            /\slen:(\d+)\s/ or die "Error, cannot extract len value from $_";
            my $len = $1;

            if ($len >= $min_prot_length) {
                $acc_orf_counter{$trans_acc}++;
            }
        }
    }
    close $fh;

    ## determine number of putative chimeras
    my $num_trans = 0;
    my $num_chims = 0;
    
    foreach my $acc (keys %acc_orf_counter) {

        $num_trans++;

        if ($acc_orf_counter{$acc} > 1) {
            
            $num_chims++;
        
        }

    }

    my $pct_chims = sprintf("%.2f", $num_chims/$num_trans*100);

    print "#trans\tchims\t\%chims\n";
    print join("\t", $num_trans, $num_chims, $pct_chims) . "\n";
    

    exit(0);



}