1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
|
#!/usr/bin/env perl
use strict;
use warnings;
my @files = <tmp.iworm.fa.pid_*.thread_*pslx>;
my %pid_to_files;
foreach my $file (@files) {
$file =~ /pid_(\d+)/ or die "error, cannnot get pid from filename $file";
my $pid = $1;
push (@{$pid_to_files{$pid}}, $file);
}
{
## handle the mpi cases
@files = <*node_*contigs*pslx>;
for my $file (@files) {
push (@{$pid_to_files{"mpi"}}, $file);
}
}
foreach my $pid (keys %pid_to_files) {
my @files = @{$pid_to_files{$pid}};
print "$pid\t" . scalar(@files) . "\n";
my $num_threads = scalar(@files);
my %FL_acc_to_file;
my %FL_file_to_acc;
foreach my $file (@files) {
my $FL_file_counter = 0;
my $fl_file = "$file.FL_selected";
open (my $fh, $fl_file) or die $!;
while (<$fh>) {
my @x = split(/\t/);
my $acc = $x[13];
$FL_acc_to_file{$acc}->{$fl_file} = 1;
$FL_file_counter++;
$FL_file_to_acc{$fl_file}->{$acc}++;
}
close $fh;
print "Num threads: $num_threads\t$fl_file\t$FL_file_counter\n";
}
my $number_FL = scalar(keys %FL_acc_to_file);
print "Num threads: $num_threads\tTotal:\n$number_FL\n";
print "\n#" . join("\t", "file", "total_FL") . "\n";
## generate distribution of FL counts.
my %count_counter;
my %acc_counter;
foreach my $acc (keys %FL_acc_to_file) {
my @files = keys %{$FL_acc_to_file{$acc}};
my $num_files = scalar @files;
$count_counter{$num_files}++;
$acc_counter{$acc} = $num_files;
}
print "\n#" . join("\t", "file", "non-redundant", "redundant") . "\n";
foreach my $file (keys %FL_file_to_acc) {
my @accs = keys %{$FL_file_to_acc{$file}};
my $num_total_accs = scalar(@accs);
my $num_redundant = 0;
foreach my $acc (@accs) {
if ($acc_counter{$acc} > 1) {
$num_redundant++;
}
}
print join("\t", $file, $num_total_accs - $num_redundant, $num_redundant) . "\n";
}
print "\nHistogram of reconstructions across threads.\n";
my @counts = sort {$a<=>$b} keys %count_counter;
foreach my $count (@counts) {
print "$count\t$count_counter{$count}\n";
}
print "\n"; # spacer
}
exit(0);
|