File: analyze_blastPlus_topHit_coverage.extract_OS.pl

package info (click to toggle)
trinityrnaseq 2.2.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 212,452 kB
  • ctags: 5,067
  • sloc: perl: 45,552; cpp: 19,678; java: 11,865; sh: 1,485; makefile: 613; ansic: 427; python: 313; xml: 83
file content (102 lines) | stat: -rwxr-xr-x 2,158 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env perl

use strict;
use warnings;

use Getopt::Long qw(:config no_ignore_case bundling pass_through);

my $usage = <<__EOUSAGE__;

#########################################################################################
#
# Required:
#
#  --blast_outfmt6_w_pct_hit_length <string>     blast.outfmt6.w_pct_hit_length  
#                                                (results from running analyze_blastPlus_tophat_coverage.pl)
#  
#  Optional:
#
#  --min_pct_hit_length <int>                    minimum percent hit length to be included in analysis. (default: 20)
#
#  --min_pct_species_report <float>              minimum percent of total species content to 
#                                                be reported in output (default: 1.0)
#
###########################################################################################


__EOUSAGE__

    ;

my $file;
my $min_pct_hit_len = 200;
my $min_pct_species_report = 1.0;

my $help_flag;

&GetOptions( 'blast_outfmt6_w_pct_hit_length=s' => \$file,
             'min_pct_hit_length=i' => \$min_pct_hit_len,
             'min_pct_species_report=f' => \$min_pct_species_report,
             
             'help|h' => \$help_flag,
             );


if ($help_flag) {
    die $usage;
}

if (@ARGV) {
    die "Error, didn't parse parameters: @ARGV";
}

unless ($file) {
    die $usage;
}


my $total = 0;
my %OS_counter;

my $prev_acc = "";

open (my $fh, $file) or die $!;
while (<$fh>) {
    if (/^\#/) { next; }
    
    chomp;
    my @x = split(/\t/);

    my $acc = $x[0];
    if ($acc eq $prev_acc) { next; }
                         

    my $pct_hit_len = $x[13];
    if ($pct_hit_len < $min_pct_hit_len) {
        next;
    }

    my $annot = $x[14];
    
    if ($annot =~ /OS=(\S+)/) {
        my $os = $1;
        $total++;
        $OS_counter{$os}++;
        
        $prev_acc = $acc;

    }
}
close $fh;

foreach my $os (sort {$OS_counter{$b} <=> $OS_counter{$a}} keys %OS_counter) {
    
    my $count = $OS_counter{$os};
    my $pct = sprintf("%.2f", $count/$total*100);

    print join("\t", $os, $count, "$pct%") . "\n" if $pct >= $min_pct_species_report;
}

exit(0);