1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
|
use strict;
use warnings;
use lib ("/usr/lib/trinityrnaseq/PerlLib");
use SAM_reader;
use SAM_entry;
my $usage = "usage: $0 file.sam\n\n";
my $sam_file = $ARGV[0] or die $usage;
main: {
my $sam_reader = new SAM_reader($sam_file);
while ($sam_reader->has_next()) {
my $sam_entry = $sam_reader->get_next();
if ($sam_entry->is_query_unmapped()) {
next;
}
my $read_name = $sam_entry->get_read_name();
my $scaff_name = $sam_entry->get_scaffold_name();
my $strand = $sam_entry->get_query_strand();
my ($genome_coords_aref, $query_coords_aref) = $sam_entry->get_alignment_coords();
my @coords;
foreach my $segment (@$genome_coords_aref) {
my ($lend, $rend) = @$segment;
push (@coords, $lend, $rend);
}
@coords = sort {$a<=>$b} @coords;
my $span_lend = shift @coords;
my $span_rend = pop @coords;
my @lengths;
my @starts;
my $num_segments = 0;
foreach my $segment (@$genome_coords_aref) {
my ($lend, $rend) = @$segment;
my $length = $rend - $lend + 1;
push (@lengths, $length);
push (@starts, $lend - $span_lend);
$num_segments++;
}
$span_lend--; # coordinate is zero-based, and rend is exclusive
print join("\t",
$scaff_name,
$span_lend,
$span_rend,
$read_name,
0,
$strand,
$span_lend,
$span_rend,
".",
$num_segments,
join(",", @lengths),
join(",", @starts),
) . "\n";
}
exit(0);
}
|