1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
|
#!/usr/bin/env perl
use strict;
use warnings;
use POSIX qw (ceil);
use FindBin;
use lib ("$FindBin::RealBin/../../PerlLib");
use Fasta_reader;
my $usage = "usage: $0 Trinity.fasta [length_bin_size=100] [out_prefix='dist']\n\n";
my $trinity_fasta = $ARGV[0] or die $usage;
my $length_bin_size = $ARGV[1] || 100;
my $out_prefix = $ARGV[2] || "dist";
main: {
my %comp_counter;
my %length_bin_counter;
my $fasta_reader = new Fasta_reader($trinity_fasta);
while (my $seq_obj = $fasta_reader->next()) {
my $acc = $seq_obj->get_accession();
my $sequence = $seq_obj->get_sequence();
my $sequence_length = length($sequence);
my $bin = ceil($sequence_length/$length_bin_size);
$length_bin_counter{$bin}++;
my $comp_name;
if ($acc =~ /^(.*comp\d+_c\d+)/) {
$comp_name = $1;
}
elsif ($acc =~ /^(.*c\d+_g\d+)/) {
$comp_name = $1;
}
else {
die "Error, cannot get component/gene id from acc: $acc";
}
$comp_counter{$comp_name}++;
}
## get component dist
my %comp_size_counter;
foreach my $count (values %comp_counter) {
$comp_size_counter{$count}++;
}
## write component dist
open (my $ofh, ">$out_prefix.comp_sizes.txt") or die $!;
print $ofh join("\t", "transPerComp", "num_components") . "\n";
foreach my $comp_size (sort {$a<=>$b} keys %comp_size_counter) {
my $comp_size_count = $comp_size_counter{$comp_size};
print $ofh join("\t", $comp_size, $comp_size_count) . "\n";
}
close $ofh;
## write length distribution
open ($ofh, ">$out_prefix.trans_lengths.txt") or die $!;
print $ofh join("\t", "#length_bin", "count_trans") . "\n";
foreach my $length_bin (sort {$a<=>$b} keys %length_bin_counter) {
my $length = $length_bin * $length_bin_size;
my $count = $length_bin_counter{$length_bin};
print $ofh join("\t", $length, $count) . "\n";
}
close $ofh;
print STDERR "\n\nDone. See files $out_prefix.comp_sizes.txt and $out_prefix.trans_lengths.txt\n\n";
exit(0);
}
|