1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
|
#!/usr/bin/perl
my $no = 0;
my $clstr_no = "";
my @this_cluster = ();
print "id clstr clstr_size length clstr_rep clstr_iden clstr_cov\n";
while($ll = <>) {
if ($ll =~ /^>/) {
if ($no>0) {
process_this_cluster();
}
if ($ll =~ /^>Cluster (\d+)/) {
$clstr_no = $1;
}
$no = 0;
@this_cluster = ();
}
else {
my ($id, $len, $rep, $iden);
if ($ll =~ /\d+\t(\d+)[a-z]{2}, >(.+)\.\.\. \*/) {
$len = $1;
$id = $2;
$rep = 1;
$iden = 100;
}
elsif ($ll =~ /\d+\t(\d+)[a-z]{2}, >(.+)\.\.\./) {
$len = $1;
$id = $2;
$rep = 0;
$ll=~/(\d+%|\d+\.\d+%)$/;
$iden = $1;
}
else {
print STDERR "***********\n";
}
push(@this_cluster, [($id, $len, $rep, $iden)]);
$no++;
}
}
if ($no>0) {
process_this_cluster();
}
sub process_this_cluster {
my ($i, $j, $k);
my @t = sort { ($b->[2] <=> $a->[2]) or ( $b->[1] <=> $a->[1]) } @this_cluster;
my $longest = 0;
foreach $i (@t) {
$longest = $i->[1] if ($i->[2]);
}
foreach $i (@t) {
my $cov = int ( $i->[1]/$longest * 100);
print "$i->[0]\t$clstr_no\t$no\t$i->[1]\t$i->[2]\t$i->[3]\t$cov\%\n";
}
}
|