1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
|
#!/usr/bin/env perl
use strict;
use warnings;
use FindBin;
use lib ("$FindBin::RealBin/../../../PerlLib");
use Nuc_translator;
my $usage = "usage: $0 blat.psl.nameSorted.sam reads.tab.nameSorted\n\n";
my $sam = $ARGV[0] or die $usage;
my $seqs = $ARGV[1] or die $usage;
main: {
open (my $sam_fh, "$sam") or die "Error, cannot open file $sam";
open (my $seqs_fh, "$seqs") or die "Error, cannot open file $seqs";
my $seq_line = <$seqs_fh>;
chomp $seq_line;
my ($seq_acc, $seq, $qual) = split(/\t/, $seq_line);
$seq_acc =~ s/\s//g; # rid any ws from acc name
unless ($qual) {
$qual = 'B' x length($seq);
}
while (my $sam_line = <$sam_fh>) {
my @x = split(/\t/, $sam_line);
my $acc = $x[0];
my $flag = $x[1];
my $aligned_orient = ($flag & 0x0010) ? '-' : '+';
while ($seq_acc lt $acc) {
$seq_line = <$seqs_fh>;
chomp $seq_line;
($seq_acc, $seq, $qual) = split(/\t/, $seq_line);
$seq_acc =~ s/\s//g; # no ws in acc name
unless (defined $qual) {
$qual = 'B' x length($seq); #$qual = "*";
}
}
if ($acc eq $seq_acc) {
if ($aligned_orient eq '-') {
my $revseq = &reverse_complement($seq);
my @q = split(//, $qual);
my $revqual = join("", reverse(@q));
$x[9] = $revseq;
$x[10] = $revqual;
}
else {
$x[9] = $seq;
$x[10] = $qual;
}
}
else {
die "Error,\n[$acc]\nnot encountered in file: $seqs,\ncurrently cursor is at seq:\n[$seq_acc]\n";
}
## set read quality score to a high value so that Scripture will use it.
$x[4] = 255;
$x[5] =~ s/H/S/gi; # convert hard to soft clips; samtools doesn't like H's
foreach my $val (@x) {
unless (defined $val) {
$val = "*";
}
}
print join("\t", @x);
}
exit(0);
}
|