File: outfmt6_add_percent_match_length.pl

package info (click to toggle)
trinityrnaseq 2.11.0%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 417,528 kB
  • sloc: perl: 48,420; cpp: 17,749; java: 12,695; python: 3,124; sh: 1,030; ansic: 983; makefile: 688; xml: 62
file content (68 lines) | stat: -rwxr-xr-x 1,826 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env perl

use strict;
use warnings;
use lib ("/usr/lib/trinityrnaseq/PerlLib");
use Fasta_reader;
use List::Util qw(min max);

my $usage = "\n\n\tusage: $0 blast.outfmt6 query_fasta target_fasta\n";

my $blast_file = $ARGV[0] or die $usage;
my $query_fasta = $ARGV[1] or die $usage;
my $target_fasta = $ARGV[2] or die $usage;

main: {

    my %query_seq_lens = &get_seq_lengths($query_fasta);

    my %target_seq_lens;
    if ($query_fasta eq $target_fasta) {
        %target_seq_lens = %query_seq_lens;
    }
    else {
        %target_seq_lens = &get_seq_lengths($target_fasta);
    }

    open (my $fh, $blast_file) or die "Error, cannot open file $blast_file";
    while (<$fh>) {
        chomp;
        my @x = split(/\t/);
        my $query_acc = $x[0];
        my $target_acc = $x[1];

        my $query_len = $query_seq_lens{$query_acc} or die "Error, cannot find seq length for query: $query_acc";
        my $target_len = $target_seq_lens{$target_acc} or die "Error, cannot find seq length for target: $target_acc";

        my $query_hit_len = abs($x[7]-$x[6]);
        my $db_hit_len = abs($x[9]-$x[8]);

        my $pct_query_len = sprintf("%.2f", $query_hit_len / $query_len * 100);
        my $pct_target_len = sprintf("%.2f", $db_hit_len / $target_len * 100);

        push (@x, $query_len, $pct_query_len, $target_len, $pct_target_len, max($pct_query_len, $pct_target_len));
        
        print join("\t", @x) . "\n";
    }
    
    exit(0);
}

####
sub get_seq_lengths {
    my ($fasta_file) = @_;

    my %seq_lens;

    my $fasta_reader = new Fasta_reader($fasta_file);
    while (my $seq_obj = $fasta_reader->next()) {

        my $acc = $seq_obj->get_accession();
        my $seq_len = length($seq_obj->get_sequence());

        $seq_lens{$acc} = $seq_len;
    }

    return(%seq_lens);
}