1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
|
#!/usr/bin/env perl
use strict;
use warnings;
use Cwd;
use File::Basename;
use Carp;
use Data::Dumper;
use Getopt::Long qw(:config no_ignore_case bundling);
$ENV{LC_ALL} = 'C'; # critical for proper sorting using [system "sort -k1,1 ..."] within the perl script
my $usage = <<_EOUSAGE_;
################################################################################################################
#
# --left and --right <string> (if paired reads)
# or
# --single <string> (if unpaired reads)
#
# Required inputs:
#
# --target <string> multi-fasta file containing the target sequences (should be named {refName}.fa )
#
# --out_prefix|o output prefix (default: bwa)
#
#
# ## General options
#
# Any options after '--' are passed onward to the alignments programs (except BLAT -which has certain options exposed above).
#
# To set the number of processors used by BWA use:
# -- -t 16
# You could also set other options such as 'mismatch penalty' (via -- -M INT), etc.
#
####################################################################################################################
_EOUSAGE_
;
my $help_flag;
my $target_db;
my $left_file;
my $right_file;
my $single_file;
my $output_prefix = "bwa";
unless (@ARGV) {
die $usage;
}
&GetOptions ( 'h' => \$help_flag,
## required inputs
'left=s' => \$left_file,
'right=s' => \$right_file,
'single=s' => \$single_file,
"target=s" => \$target_db,
'output_prefix|o=s' => \$output_prefix,
);
if ($help_flag) { die $usage; }
unless ($target_db && -s $target_db) {
die $usage . "Must specify target_db and it must exist at that location";
}
unless ( ($single_file && -e $single_file)
||
($left_file && -e $left_file
&& $right_file && -e $right_file)) {
die $usage . "sorry, cannot find $left_file and $right_file";
}
####
sub process_cmd {
my ($cmd) = @_;
print STDERR "CMD: $cmd\n";
my $ret = system($cmd);
if ($ret) {
confess "Error, cmd: $cmd died with ret $ret";
}
return;
}
main: {
my $cmd = "bwa index $target_db";
&process_cmd($cmd) unless (-e "$target_db.bwt");;
$cmd = "samtools faidx $target_db";
&process_cmd($cmd) unless (-e "$target_db.fai");
if ($left_file && $right_file) {
$cmd = "bwa aln @ARGV $target_db $left_file > $left_file.sai";
&process_cmd($cmd);
$cmd = "bwa aln @ARGV $target_db $right_file > $right_file.sai";
&process_cmd($cmd);
$cmd = "bwa sampe $target_db $left_file.sai $right_file.sai $left_file $right_file | samtools view -bS -F 4 - | samtools sort -o $output_prefix.bam";
&process_cmd($cmd);
}
else {
$cmd = "bwa aln @ARGV $target_db $single_file > $single_file.sai";
&process_cmd($cmd);
$cmd = "bwa samse $target_db $single_file.sai $single_file | samtools view -bS -F 4 - | samtools sort -o $output_prefix.bam";
&process_cmd($cmd);
}
exit(0);
}
|