1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
|
#!/usr/bin/env perl
use strict;
use warnings;
use FindBin;
use lib ("$FindBin::RealBin/../../PerlLib");
use SAM_reader;
use SAM_entry;
use Overlap_piler;
my $usage = "usage: $0 file.sam\n\n";
my $sam_file = $ARGV[0] or die $usage;
main: {
my $sam_reader = new SAM_reader($sam_file);
my %core_to_coords;
while ($sam_reader->has_next()) {
my $sam_entry = $sam_reader->get_next();
if ($sam_entry->is_query_unmapped()) {
next;
}
my $read_name = $sam_entry->get_read_name();
my $scaff_name = $sam_entry->get_scaffold_name();
my $strand = $sam_entry->get_query_strand();
my ($genome_coords_aref, $query_coords_aref) = $sam_entry->get_alignment_coords();
my $core_scaff = join("$;", $sam_entry->get_core_read_name(), $scaff_name);
my @coords;
foreach my $segment (@$genome_coords_aref) {
my ($lend, $rend) = @$segment;
push (@{$core_to_coords{$core_scaff}}, [$lend, $rend]);
}
}
foreach my $core_scaff (keys %core_to_coords) {
my @coords = @{$core_to_coords{$core_scaff}};
my ($read_core_name, $scaff_name) = split(/$;/, $core_scaff);
@coords = sort {$a<=>$b} @coords;
@coords = &Overlap_piler::simple_coordsets_collapser(@coords);
my $span_lend = $coords[0]->[0];
my $span_rend = $coords[$#coords]->[1];
my @lengths;
my @starts;
my $num_segments = 0;
foreach my $segment (@coords) {
my ($lend, $rend) = @$segment;
my $length = $rend - $lend + 1;
push (@lengths, $length);
push (@starts, $lend - $span_lend);
$num_segments++;
}
$span_lend--; # coordinate is zero-based, and rend is exclusive
print join("\t",
$scaff_name,
$span_lend,
$span_rend,
$read_core_name,
0,
"+",
$span_lend,
$span_rend,
".",
$num_segments,
join(",", @lengths),
join(",", @starts),
) . "\n";
}
exit(0);
}
|