File: analyze_blastPlus_topHit_coverage.annotate_details_w_FL_info.pl

package info (click to toggle)
trinityrnaseq 2.11.0%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 417,528 kB
  • sloc: perl: 48,420; cpp: 17,749; java: 12,695; python: 3,124; sh: 1,030; ansic: 983; makefile: 688; xml: 62
file content (128 lines) | stat: -rwxr-xr-x 2,676 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/usr/bin/env perl

use strict;
use warnings;

use Getopt::Long qw(:config no_ignore_case bundling pass_through);


my $help_flag;

my $usage = <<__EOUSAGE__;

#################################################################################################
#
# Required:
#
#  --details <string>              compreh_init_build.details filename
#  --blast <string>                blast.outfmt6.w_pct_hit_length filename
#
# Optional:
#
#  --min_pct_hit_len <float>     min length of alignment coverage of hit (default: 80)
#  --pasa_validations <string>   incorporate notes from pasa's alignment validations file (alignment.validations.out)
#
#################################################################################################


__EOUSAGE__

    ;


my $compreh_details_file;
my $blast_file;
my $min_pct_hit_len = 80;
my $pasa_validations_file;

&GetOptions ( 'h' => \$help_flag,
              'details=s' => \$compreh_details_file,
              'blast=s' => \$blast_file,
              'min_pct_hit_len=f' => \$min_pct_hit_len,
              'pasa_validations=s' => \$pasa_validations_file,
              
              );


if ($help_flag) {
    die $usage;
}

unless ($compreh_details_file && $blast_file) {
    die $usage;
}

my %pasa_validations_info;
if ($pasa_validations_file) {
    
    open (my $fh, $pasa_validations_file) or die $!;
    while (<$fh>) {
        chomp;
        my @x = split(/\t/);
        my $acc = $x[1];
        my $note = $x[14];
        if ($note) {
            $pasa_validations_info{$acc} .= $note . "; ";
        }
    }
    close $fh;

}

my %FL_mappings;

{
    open (my $fh, $blast_file) or die $!;
    while (<$fh>) {
        if (/^\#/) { next; }
        
        chomp;
        my @x = split(/\t/);
        my $pct_hit_len = $x[13];
        if ($pct_hit_len < $min_pct_hit_len) {
            next;
        }
        
        my $query = $x[0];
        my $hit = $x[1];
        my $annot = $x[14];
        my $Evalue = $x[10];

        $FL_mappings{$query} = join("\t", $hit, $annot, $Evalue, $pct_hit_len);
        
    }
    close $fh;
}

my $prev_gene = "";
open (my $fh, $compreh_details_file) or die $!;
while (<$fh>) {
    chomp;
    my @x = split(/\t/);
    my $gene = $x[0];
    my $acc = $x[1];
    if (my $mappings = $FL_mappings{$acc}) {
        $mappings =~ s/\t/ /g;
        push (@x, $mappings);
    }
    else {
        push (@x, "");
    }
    if (my $validation_note = $pasa_validations_info{$acc}) {
        push (@x, "validation_note: $validation_note");
    }
    

    if ($gene ne $prev_gene) {
        print "\n";
    }
    $prev_gene = $gene;
    
    print join("\t", @x) . "\n";
}



exit(0);