1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
|
#!/usr/bin/env perl
use strict;
use warnings;
my $usage = "\n\tusage: $0 cd-hit.clstr\n\n" .
"try running cd-hit first like so:\n" .
"\tcd-hit-est -o cdhit -c 0.98 -i Trinity.fasta -p 1 -d 0 -b 3 -T 10\n\n";
my $cdhit_file = $ARGV[0] or die $usage;
main: {
my $num_bad_clusters = 0;
my $cluster;
my @trans;
open(my $fh, $cdhit_file) or die $!;
while (<$fh>) {
chomp;
if (/^>/) {
if (@trans) {
$num_bad_clusters += &examine_cluster($cluster, \@trans);
}
$cluster = $_;
@trans = ();
}
else {
push (@trans, $_);
}
}
close $fh;
if (@trans) {
$num_bad_clusters += &examine_cluster($cluster, \@trans);
}
print "Num bad clusters: $num_bad_clusters\n";
exit($num_bad_clusters);
}
####
sub examine_cluster {
my ($cluster, $trans_aref) = @_;
my @trans = @$trans_aref;
my %cluster_ids;
foreach my $tran (@trans) {
$tran =~ /TRINITY_(DN\d+)_/;
$cluster_ids{$1}++;
}
my $num_clusters = scalar (keys %cluster_ids);
if ($num_clusters != 1) {
print STDERR "ERROR, got multiple clusters represented:\n"
. "$cluster\n" . join("\n", @trans) . "\n\n";
return(1);
}
else {
return(0);
}
}
|