1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
|
package main;
our $SEE = 0;
package CDNA::Overlap_assembler;
use strict;
sub new {
my $packagename = shift;
my $self = {
node_list => []
};
bless ($self, $packagename);
return ($self);
}
sub add_cDNA {
my $self = shift;
my ($cdna_acc, $end5, $end3) = @_;
my ($cdna_start, $cdna_stop) = sort {$a<=>$b} ($end5, $end3);
my $node = Cdna_node->new($cdna_acc, $cdna_start, $cdna_stop);
push (@{$self->{node_list}}, $node);
}
####
sub build_clusters {
my $self = shift;
my $node_list_aref = $self->{node_list};
@{$node_list_aref} = sort {$a->{lend}<=>$b->{lend}} @{$node_list_aref}; #sort by lend coord.
## set indices
for (my $i = 0; $i <= $#{$node_list_aref}; $i++) {
$node_list_aref->[$i]->{myIndex} = $i;
}
my @clusters;
my $first_node = $node_list_aref->[0];
my $start_pos = 0;
my ($exp_left, $exp_right) = ($first_node->{lend}, $first_node->{rend});
print $first_node->{acc} . " ($exp_left, $exp_right)\n" if $SEE;
for (my $i = 1; $i <= $#{$node_list_aref}; $i++) {
my $curr_node = $node_list_aref->[$i];
my ($lend, $rend) = ($curr_node->{lend}, $curr_node->{rend});
print $curr_node->{acc} . " ($lend, $rend)\n" if $SEE;
if ($exp_left <= $rend && $exp_right >= $lend) { #overlap
$exp_left = &min($exp_left, $lend);
$exp_right = &max($exp_right, $rend);
print "overlap. New expanded coords: ($exp_left, $exp_right)\n" if $SEE;
} else {
print "No overlap; Creating cluster: " if $SEE;
my @cluster;
for (my $j=$start_pos; $j < $i; $j++) {
my $acc = $node_list_aref->[$j]->{acc};
push (@cluster, $acc);
print "$acc, " if $SEE;
}
push (@clusters, [@cluster]);
$start_pos = $i;
($exp_left, $exp_right) = ($lend, $rend);
print "\nResetting expanded coords: ($lend, $rend)\n" if $SEE;
}
}
print "# Adding final cluster.\n" if $SEE;
if ($start_pos != $#{$node_list_aref}) {
print "final cluster: " if $SEE;
my @cluster;
for (my $j = $start_pos; $j <= $#{$node_list_aref}; $j++) {
my $acc = $node_list_aref->[$j]->{acc};
print "$acc, " if $SEE;
push (@cluster, $acc);
}
push (@clusters, [@cluster]);
print "\n" if $SEE;
} else {
my $acc = $node_list_aref->[$start_pos]->{acc};
push (@clusters, [$acc]);
print "adding final $acc.\n" if $SEE;
}
return (@clusters);
}
sub min {
my (@x) = @_;
@x = sort {$a<=>$b} @x;
my $min = shift @x;
return ($min);
}
sub max {
my @x = @_;
@x = sort {$a<=>$b} @x;
my $max = pop @x;
return ($max);
}
#################################################################
package Cdna_node;
use strict;
sub new {
my $packagename = shift;
my ($acc, $lend, $rend) = @_;
my $self = { acc=>$acc,
lend=>$lend,
rend=>$rend,
myIndex=>undef(),
overlapping_indices=>[]
};
bless ($self, $packagename);
return ($self);
}
1; #EOM
|