1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
|
#!/usr/bin/env perl
use strict;
use warnings;
my $usage = "usage: $0 inputFile out.cleanReads out.malformedReads\n\n";
my $inputFile = $ARGV[0] or die $usage;
my $cleanReads = $ARGV[1] or die $usage;
my $malformedReads = $ARGV[2] or die $usage;
open (my $ofh_clean, ">$cleanReads") or die "Error, can't write to $cleanReads";
open (my $ofh_malformed, ">$malformedReads") or die "Error, cannot write to $malformedReads";
open (my $fh, $inputFile) or die "Error, cannot open $inputFile";
my $counter = 0;
my $num_clean = 0;
my $num_dirty = 0;
my @rec;
my $line = <$fh>;
while ($line) {
if ($line =~ /^\@/) {
$counter++;
print STDERR "\r[$counter] [$num_clean clean] [$num_dirty dirty] " if ($counter % 10000 == 0);
push (@rec, $line);
$line = <$fh>;
for (1..3) {
push (@rec, $line);
$line = <$fh>;
}
my $record_text = join("", @rec);
my $header = shift @rec;
my $seq = shift @rec;
my $qual_header = shift @rec;
my $qual_line = shift @rec;
chomp $header;
chomp $seq if $seq;
chomp $qual_header if $qual_header;
chomp $qual_line if $qual_line;
if ($header && $seq && $qual_header && $qual_line &&
$qual_header =~ /^\+/ && length($seq) == length($qual_line)) {
# can do some more checks here if needed to be sure that the lines are formatted as expected.
print $ofh_clean join("\n", $header, $seq, $qual_header, $qual_line) . "\n";
$num_clean++;
}
else {
print $ofh_malformed $record_text;
$num_dirty++;
}
@rec = ();
} else {
$line = <$fh>;
}
}
exit(0);
|