File: CDNA_stitcher.pm

package info (click to toggle)
trinityrnaseq 2.11.0%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 417,528 kB
  • sloc: perl: 48,420; cpp: 17,749; java: 12,695; python: 3,124; sh: 1,030; ansic: 983; makefile: 688; xml: 62
file content (289 lines) | stat: -rwxr-xr-x 11,926 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
package main;
our $SEE;

package CDNA::CDNA_stitcher;
use strict;
use CDNA::CDNA_alignment;
use CDNA::Gene_obj_alignment_assembler;  #used for converting gene_obj to alignment.
use Carp;

my $FUZZDIST = 20; #allow FUZZDIST nt extension beyond splice site.

sub new {
    my $packagename = shift;
    my $self = {
	fuzzlength=>$FUZZDIST  #default setting.
	};
    bless ($self, $packagename);
    return ($self);
}


sub set_fuzzlength {
    my $self = shift;
    my $fuzzdist = shift;
    $self->{fuzzlength} = $fuzzdist;
}

sub stitch_alignments {
    my $self = shift;
    my ($template_alignment, $thread_alignment) = @_;

    my $template_acc = $template_alignment->get_acc();
    my $thread_acc = $thread_alignment->get_acc();

    print "Stitching Template:\n$template_acc: " . $template_alignment->toToken() 
        . "\nby\n$thread_acc: " . $thread_alignment->toToken() . "\n\n" if $main::SEE;

    ## don't even think about stitching alignments of opposite spliced orienatations!!!

    my $template_spliced_orient = $template_alignment->get_spliced_orientation();
    my $thread_spliced_orient = $thread_alignment->get_spliced_orientation();

    if ($template_spliced_orient ne '?' && $thread_spliced_orient ne '?') {
        ## both have assigned transcribed orients
        if ($template_spliced_orient ne $thread_spliced_orient) {
            confess "Error, trying to stitch together oppositely transcribed alignments.  That's impossible. ";
        }
    }
    
    
    my $spliced_orient = ($thread_spliced_orient ne '?') ? $thread_spliced_orient : $template_spliced_orient;
    
    my $fuzzlength = $self->{fuzzlength};

    ## Algorithm
    ## -anchor the first thread segment to the template alignment
    ## -determine the boundaries of the matching anchor and first threaded segment
    ## -add all preceding template segments to the stitched alignment if splice compatible.
    ## -add internal cdna alignment segments
    ## -add terminal template alignment segments if splice compatible.

    my @template_segments = $template_alignment->get_alignment_segments();
    my @threaded_segments = $thread_alignment->get_alignment_segments();
    
    my $single_threaded_segment = ($#threaded_segments == 0) ? 1:0; #indicate only a single segment exists.

    my @stitched_segments;
    ## Anchor first threaded segment to the template alignment:
    my ($thread_lend, $thread_rend) = $threaded_segments[0]->get_coords();
    my $anchor_pos = -1;
    for (my $i = 0; $i <= $#template_segments; $i++) {
        my ($lend, $rend) = $template_segments[$i]->get_coords();
        if ($lend < $thread_rend && $rend > $thread_lend) { #overlap
            $anchor_pos = $i;
            last;
        }
    }
    print "Anchoring first cDNA segment. Anchor point: $anchor_pos\n" if $SEE;
    if ($anchor_pos > -1) { #found an anchor point in template alignment.
        
        
        ## add stitched segment
        my $anchor_segment = $template_segments[$anchor_pos];
        my ($anchor_lend, $anchor_rend) = $anchor_segment->get_coords();
        my $first_threaded_segment = $threaded_segments[0];
        my ($thread_lend, $thread_rend) = $first_threaded_segment->get_coords(); 
        # initialize to largest spread
        my ($new_lend) = ($anchor_lend < $thread_lend) ? $anchor_lend : $thread_lend;
        my ($new_rend) = ($anchor_rend > $thread_rend) ? $anchor_rend : $thread_rend;
        # adjust based on splice boundaries
        my ($has_left_splice_junction, $has_right_splice_junction);
        $has_left_splice_junction = $anchor_segment->has_left_splice_junction(); #initialize.
        if ($anchor_segment->has_left_splice_junction() && ($anchor_lend - $thread_lend <= $fuzzlength)) {
            $new_lend = $anchor_lend;
        } elsif ($thread_lend < $anchor_lend) {
            #using threaded segment as initial stitched segment
            $has_left_splice_junction = $first_threaded_segment->has_left_splice_junction();
        }
        
        $has_right_splice_junction = $first_threaded_segment->has_right_splice_junction();
        if ($first_threaded_segment->has_right_splice_junction()) {
            $new_rend = $thread_rend;
        } elsif ($anchor_segment->has_right_splice_junction() && ($thread_rend - $anchor_rend <= $fuzzlength)) {
            $new_rend = $anchor_rend;
            $has_right_splice_junction = $anchor_segment->has_right_splice_junction();
        }
        
        ## Add preceding template segments if current segment is left-splice compatible.
        if ($has_left_splice_junction) {
            print "Adding all template segments before position $anchor_pos.\n" if $SEE;
            ## add all template segments to the stitched alignment preceding the anchor point:
            for (my $i = 0; $i < $anchor_pos; $i++) {
                push (@stitched_segments, $template_segments[$i]->clone());
            }
        }
        
        ## add the current segment.
        my $newsegment = $anchor_segment->clone();
        $newsegment->set_coords($new_lend, $new_rend);
        $newsegment->set_left_splice_junction($has_left_splice_junction);
        $newsegment->set_right_splice_junction($has_right_splice_junction);
        print "adding current segment: lsplice: $has_left_splice_junction, rsplice: $has_right_splice_junction\n" if $SEE;
        push (@stitched_segments, $newsegment);
        
    } else { #no anchor pos, simply add the first threaded segment:
        print "No anchor position, simply adding the first threaded segment.\n" if $SEE;
        push (@stitched_segments, $threaded_segments[0]->clone());
    }
    
    ## Add all but last threaded segments to the stitched alignment:
    
    for (my $i = 1; $i < $#threaded_segments; $i++) {
        print "Adding internal cDNA alignment segment, index: $i\n" if $SEE;
        push (@stitched_segments, $threaded_segments[$i]->clone());
    }
    
    ## Anchor the last threaded segment to a segment within the template model:
    print "Anchoring the terminal segment.\n" if $SEE;
    $anchor_pos = -1;
    my $last_threaded_segment = $threaded_segments[$#threaded_segments];
    ($thread_lend, $thread_rend) = $last_threaded_segment->get_coords();
    # find last template segment overlapping last cdna segment
    for (my $i = $#template_segments; $i >= 0; $i--) {
        my ($lend, $rend) = $template_segments[$i]->get_coords();
        if ($lend < $thread_rend && $rend > $thread_lend) { #overlap
            $anchor_pos = $i;
            last;
        }
    }
    print "Terminal anchor position: $anchor_pos\n" if $SEE;
    if ($anchor_pos > -1) {
        #build composite segment
        my $anchor_segment = $template_segments[$anchor_pos];
        my $has_left_splice_junction = $last_threaded_segment->has_left_splice_junction(); #initialize.
        my $has_right_splice_junction = $anchor_segment->has_right_splice_junction();
        my ($anchor_lend, $anchor_rend) = $anchor_segment->get_coords();
        my $new_lend = ($anchor_lend < $thread_lend) ? $anchor_lend : $thread_lend;
        my $new_rend = ($anchor_rend > $thread_rend) ? $anchor_rend : $thread_rend;
        if ($last_threaded_segment->has_left_splice_junction()) {
            $new_lend = $thread_lend;
        } elsif ($anchor_segment->has_left_splice_junction() && ($anchor_lend - $thread_lend <= $fuzzlength)) {
            $new_lend = $anchor_lend;
            $has_left_splice_junction = $anchor_segment->has_left_splice_junction();
        }
        if ($anchor_segment->has_right_splice_junction() && ($thread_rend - $anchor_rend <= $fuzzlength)) {
            $new_rend = $anchor_rend;
        } else {
            $has_right_splice_junction = $last_threaded_segment->has_right_splice_junction();
        }
        if ($single_threaded_segment) { #only one segment (first == last)
            #just update existing status:
            print "Only one threaded segment, updating it's rend coord and status.\n" if $SEE;
            my $last_stitched_segment = $stitched_segments[$#stitched_segments];
            my ($curr_lend, $curr_rend) = $last_stitched_segment->get_coords();
            $last_stitched_segment->set_coords($curr_lend, $new_rend);
            $last_stitched_segment->set_right_splice_junction($has_right_splice_junction);
        } else {
            my $newsegment = $anchor_segment->clone();
            $newsegment->set_coords($new_lend, $new_rend);
            $newsegment->set_left_splice_junction($has_left_splice_junction);
            $newsegment->set_right_splice_junction($has_right_splice_junction);
            print "adding composite terminal segment. Lsplice: $has_left_splice_junction, Rsplice: $has_right_splice_junction.\n" if $SEE;
            push (@stitched_segments, $newsegment);
        }
    } else {
        print "No matching terminal exon in template.\n" if $SEE;
        if ($single_threaded_segment) {
            print "Single threaded segment remains unchanged.\n" if $SEE;
        } else {
            
            print "Adding last cDNA exon.\n" if $SEE;
            push (@stitched_segments, $last_threaded_segment->clone());
        }
    }
    
    
    ## Add terminal template segments if the last stitched segment contains a splice junction
    
    my $last_stitched_segment =  $stitched_segments[$#stitched_segments];
    if ($last_stitched_segment->has_right_splice_junction()) {
        print "Adding terminal template segments:\n" if $SEE;
        my ($last_lend, $last_rend) = $last_stitched_segment->get_coords();
        $anchor_pos = -1;
        for (my $i = $#template_segments; $i >= 0; $i--) {
            my ($anchor_lend, $anchor_rend) = $template_segments[$i]->get_coords();
            if ($anchor_rend > $last_lend && $anchor_lend < $last_rend) { #overlap
                $anchor_pos = $i;
                last;
            }
        }
        print "Last overlapping alignment segment in index found at: $anchor_pos\n" if $SEE;
        if ($anchor_pos > -1) {
            for (my $i = $anchor_pos + 1; $i <= $#template_segments; $i++) {
                print "\tadding terminal template segment index: $i\n" if $SEE;
                push (@stitched_segments, $template_segments[$i]->clone());
            }
        }
    }
    
    
    ## Done stitching segments together.  Create new alignment:
    my $cdna_length = 0;
    foreach my $segment (@stitched_segments) {
        my $length = $segment->get_length();
        $cdna_length += $length;
    }
    
    my $seq_ref = $template_alignment->get_genomic_seq_ref();
    
    my $stitched_alignment = new CDNA::CDNA_alignment ($cdna_length, \@stitched_segments, $seq_ref); #aligned orientation auto assigned.
    
    $stitched_alignment->set_spliced_orientation($spliced_orient);
    $stitched_alignment->set_acc($template_alignment->get_acc());
    
    print "Stitched alignment: " . $stitched_alignment->toToken() . "\n" if $SEE;
    
    return ($stitched_alignment);
}



sub stitch_alignment_into_gene {
    my $self = shift;
    my ($gene_obj, $cdna_obj, $sequence_ref) = @_;
    
    my $gene_obj_assembler = new CDNA::Gene_obj_alignment_assembler($sequence_ref);
    my $gene_based_cdna_obj = $gene_obj_assembler->gene_obj_to_cdna_alignment($gene_obj);
    
    my $stitched_alignment = $self->stitch_alignments($gene_based_cdna_obj, $cdna_obj);
    $stitched_alignment->set_fli(1);
    
    my $partials_href = $self->_analyze_partial_status($gene_obj);
    my $stitched_gene_obj = $stitched_alignment->get_gene_obj_via_alignment($partials_href);
    
    return ($stitched_gene_obj);
    
}


#### 
sub _analyze_partial_status {
    my $self = shift;
    my ($gene_obj) = shift;
    my %partials;
    if ($gene_obj->is_5prime_partial()) {
        $partials{"5prime"} = 1;
    }
    if ($gene_obj->is_3prime_partial()) {
        $partials{"3prime"} = 1;
    }
    return (\%partials);
}




1; #EOM