1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
|
#!/usr/bin/env perl
use strict;
use warnings;
use Carp;
use Getopt::Long qw(:config posix_default no_ignore_case bundling pass_through);
use lib("/usr/lib/trinityrnaseq/PerlLib");
use Fastq_reader;
use Data::Dumper;
my $help_flag;
my $usage = <<__EOUSAGE__;
#############################################################################################
#
# --samples_file <string> samples.txt
#
# --output_prefix <string> outputs will be named <string>.left.fq and <string>.right.fq
#
#############################################################################################
__EOUSAGE__
;
my $samples_file;
my $output_prefix;
&GetOptions ( 'h' => \$help_flag,
'samples_file=s' => \$samples_file,
'output_prefix=s' => \$output_prefix,
);
unless ($samples_file && $output_prefix) {
die $usage;
}
main: {
my @samples = &parse_samples_file($samples_file);
# init fq readers
foreach my $sample (@samples) {
my $left_fq = $sample->{left_fq};
my $left_fq_reader = new Fastq_reader($left_fq);
$sample->{left_fq_reader} = $left_fq_reader;
my $right_fq = $sample->{right_fq};
my $right_fq_reader = new Fastq_reader($right_fq);
$sample->{right_fq_reader} = $right_fq_reader;
}
# open output files:
my $left_fq_outfile = "$output_prefix.left.fq";
open(my $left_ofh, ">$left_fq_outfile") or die "Error, cannot write to $left_fq_outfile";
my $right_fq_outfile = "$output_prefix.right.fq";
open(my $right_ofh, ">$right_fq_outfile") or die "Error, cannot write to $right_fq_outfile";
my $samples_remaining = scalar(@samples);
my $counter = 0;
while($samples_remaining != 0) {
$samples_remaining = 0;
foreach my $sample (@samples) {
if ($sample->{done}) { next; }
my $sample_name = $sample->{sample_name};
my $left_fq_reader = $sample->{left_fq_reader};
my $left_fq_entry = $left_fq_reader->next();
my $right_fq_reader = $sample->{right_fq_reader};
my $right_fq_entry = $right_fq_reader->next();
if ($left_fq_entry xor $right_fq_entry) {
confess "Error, found left_fq entry but not right_fq entry: " . Dumper([$sample, $left_fq_entry, $right_fq_entry]);
}
elsif ($left_fq_entry && $right_fq_entry) {
print $left_ofh $left_fq_entry->get_fastq_record();
print $right_ofh $right_fq_entry->get_fastq_record();
$samples_remaining = 1;
$counter++;
if ($counter % 100000 == 0) {
print STDERR "\r[$counter] ";
}
}
else {
$sample->{done} = 1;
$sample->{left_fq_reader}->finish();
$sample->{right_fq_reader}->finish();
}
}
} # end of while samples remaining
close $left_ofh;
close $right_ofh;
print STDERR "\n\nDone. Output $counter fastq records per file.\n";
exit(0);
}
####
sub parse_samples_file {
my ($samples_file) = @_;
my @samples;
open(my $fh, $samples_file) or die "Error, cannot open file: $samples_file";
while(<$fh>) {
chomp;
my ($sample_name, $left_fq, $right_fq) = split(/\t/);
push (@samples, { sample_name => $sample_name,
left_fq => $left_fq,
right_fq => $right_fq,
} );
}
close $fh;
return(@samples);
}
|