File: Overlap_assembler.pm

package info (click to toggle)
trinityrnaseq 2.11.0%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 417,528 kB
  • sloc: perl: 48,420; cpp: 17,749; java: 12,695; python: 3,124; sh: 1,030; ansic: 983; makefile: 688; xml: 62
file content (119 lines) | stat: -rwxr-xr-x 2,927 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
package main;
our $SEE = 0;


package CDNA::Overlap_assembler;

use strict;

sub new {
    my $packagename = shift;
    my $self = {
	node_list => []
	};
    bless ($self, $packagename);
    return ($self);
}

sub add_cDNA {
    my $self = shift;
    my ($cdna_acc, $end5, $end3) = @_;
    my ($cdna_start, $cdna_stop) = sort {$a<=>$b} ($end5, $end3);
    my $node = Cdna_node->new($cdna_acc, $cdna_start, $cdna_stop);
    push (@{$self->{node_list}}, $node);
}


####
sub build_clusters {
    my $self = shift;
    my $node_list_aref = $self->{node_list};
    @{$node_list_aref} = sort {$a->{lend}<=>$b->{lend}} @{$node_list_aref}; #sort by lend coord.
    ## set indices
    for (my $i = 0; $i <= $#{$node_list_aref}; $i++) {
	$node_list_aref->[$i]->{myIndex} = $i;
    }
    
    my @clusters;
    my $first_node = $node_list_aref->[0];
    my $start_pos = 0;
    my ($exp_left, $exp_right) = ($first_node->{lend}, $first_node->{rend});
    print $first_node->{acc} . " ($exp_left, $exp_right)\n" if $SEE;
    for (my $i = 1; $i <= $#{$node_list_aref}; $i++) {
	my $curr_node = $node_list_aref->[$i];
	my ($lend, $rend) = ($curr_node->{lend}, $curr_node->{rend});
	print $curr_node->{acc} . " ($lend, $rend)\n" if $SEE;
	if ($exp_left <= $rend && $exp_right >= $lend) { #overlap
	    $exp_left = &min($exp_left, $lend);
	    $exp_right = &max($exp_right, $rend);
	    print "overlap. New expanded coords: ($exp_left, $exp_right)\n" if $SEE;
	} else {
	    print "No overlap; Creating cluster: " if $SEE;
	    my @cluster;
	    for (my $j=$start_pos; $j < $i; $j++) {
		my $acc = $node_list_aref->[$j]->{acc};
		push (@cluster, $acc);
		print "$acc, " if $SEE;
	    }
	    push (@clusters, [@cluster]);
	    $start_pos = $i;
	    ($exp_left, $exp_right) = ($lend, $rend);
	    print "\nResetting expanded coords: ($lend, $rend)\n" if $SEE;
	}
    }

    print "# Adding final cluster.\n" if $SEE;
    if ($start_pos != $#{$node_list_aref}) {
	print "final cluster: " if $SEE;
	my @cluster;
	for (my $j = $start_pos; $j <= $#{$node_list_aref}; $j++) {
	   my $acc = $node_list_aref->[$j]->{acc};
	   print "$acc, " if $SEE;
	   push (@cluster, $acc);
       }
	push (@clusters, [@cluster]);
	print "\n" if $SEE;
    } else {
	my $acc = $node_list_aref->[$start_pos]->{acc};
	push (@clusters, [$acc]);
	print "adding final $acc.\n" if $SEE;
    }
    return (@clusters);
}

sub min {
    my (@x) = @_;
    @x = sort {$a<=>$b} @x;
    my $min = shift @x;
    return ($min);
}

sub max {
    my @x = @_;
    @x = sort {$a<=>$b} @x;
    my $max = pop @x;
    return ($max);
}

#################################################################
package Cdna_node;
use strict;

sub new {
    my $packagename = shift;
    my ($acc, $lend, $rend) = @_;
    my $self = { acc=>$acc,
		 lend=>$lend,
		 rend=>$rend,
		 myIndex=>undef(),
		 overlapping_indices=>[]
		 };
    bless ($self, $packagename);
    return ($self);
}

1; #EOM