File: add_annot_to_trans_id.pl

package info (click to toggle)
trinityrnaseq 2.15.2%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 468,004 kB
  • sloc: perl: 49,905; cpp: 17,993; java: 12,489; python: 3,282; sh: 1,989; ansic: 985; makefile: 717; xml: 62
file content (82 lines) | stat: -rwxr-xr-x 1,717 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env perl

use strict;
use warnings;

my $usage = "usage: $0 annot.gff3 target_file.txt\n\n";

my $annot_gff3 = $ARGV[0] or die $usage;
my $target_file = $ARGV[1] or die $usage;


main: {

    my %trans_id_to_annot = &parse_trans_names($annot_gff3);

    open (my $fh, $target_file) or die "Error, cannot open file $target_file";
    while (<$fh>) {
        chomp;
        my @x = split(/\t/);
        
        if (my $name = $trans_id_to_annot{ $x[0] }) {
            $x[0] .= " $name";
            $x[0] =~ s/\s/_/g;
        }

        print join("\t", @x) . "\n";
    }
    close $fh;



    exit(0);
    
}


####
sub parse_trans_names {
    my ($gff3_file) = @_;


    my %gene_to_name;
    my %mRNA_to_gene;

    open (my $fh, $gff3_file) or die "Error, cannot open file $gff3_file";
    while (<$fh>) {
        unless (/\w/) { next; }
        
        my @x = split(/\t/);
        my $feat_type = $x[2];
        my $info = $x[8];
        
        if ($info =~ /ID=([^;]+)/) {
            my $id = $1;

            if ($feat_type eq "mRNA") {
                $info =~ /Parent=([^;]+)/ or die "Error, cannot get parent info from $_";
                my $gene = $1;
                $mRNA_to_gene{$id} = $gene;
            }
            elsif ($feat_type eq "gene") {
                $info =~ /Name=(.*)/ or die "Error, cannot get name from $_";
                my $name = $1;
                $gene_to_name{$id} = $name;
            }
        }
    }
    close $fh;

    my %trans_to_name;
    foreach my $mRNA (keys %mRNA_to_gene) {
        my $gene = $mRNA_to_gene{$mRNA};
        my $name = $gene_to_name{$gene};
        $trans_to_name{$mRNA} = $name;
    }


    return(%trans_to_name);
}