File: MapTileUtils.pm

package info (click to toggle)
bioperl 1.7.8-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 35,964 kB
  • sloc: perl: 94,019; xml: 14,811; makefile: 15
file content (763 lines) | stat: -rw-r--r-- 21,011 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
#$Id$
package Bio::Search::Tiling::MapTileUtils;
$Bio::Search::Tiling::MapTileUtils::VERSION = '1.7.8';

use strict;
use warnings;
use Exporter;

BEGIN {
    our @ISA = qw( Exporter );
    our @EXPORT = qw( get_intervals_from_hsps 
                      interval_tiling 
                      decompose_interval
                      containing_hsps
                      covering_groups
                      _allowable_filters 
                      _set_attributes
                      _mapping_coeff);
}

# tiling trials
# assumed: intervals are [$a0, $a1], with $a0 <= $a1
=head1 NAME

Bio::Search::Tiling::MapTileUtils - utilities for manipulating closed intervals for an HSP tiling algorithm

=head1 SYNOPSIS

Not used directly.

=head1 DESCRIPTION

Not used directly.

=head1 NOTE

An "interval" in this module is defined as an arrayref C<[$a0, $a1]>, where
C<$a0, $a1> are scalar numbers satisfying C<$a0 E<lt>= $a1>.

=head1 AUTHOR

Mark A. Jensen - maj -at- fortinbras -dot- us

=head1 APPENDIX

=head2 interval_tiling    

 Title   : interval_tiling()
 Usage   : @tiling = interval_tiling( \@array_of_intervals )
 Function: Find minimal set of intervals covering the input set
 Returns : array of arrayrefs of the form
  ( [$interval => [ @indices_of_collapsed_input_intervals ]], ...)
 Args    : arrayref of intervals

=cut

sub interval_tiling {
    return unless $_[0]; # no input
    my $n = scalar @{$_[0]};
    my %input;
    @input{(0..$n-1)} = @{$_[0]};
    my @active = (0..$n-1);
    my @hold;
    my @tiled_ints;
    my @ret;
    while (@active) {
	my $tgt = $input{my $tgt_i = shift @active};
	push @tiled_ints, $tgt_i;
	my $tgt_not_disjoint = 1;
	while ($tgt_not_disjoint) {
	    $tgt_not_disjoint = 0;
	    while (my $try_i = shift @active) {
		my $try = $input{$try_i};
		if ( !are_disjoint($tgt, $try) ) {
		    $tgt = min_covering_interval($tgt,$try);
		    push @tiled_ints, $try_i;
		    $tgt_not_disjoint = 1;
		}
		else {
		    push @hold, $try_i;
		}
	    }
	    if (!$tgt_not_disjoint) {
		push @ret, [ $tgt => [@tiled_ints] ];
		@tiled_ints = ();
	    }
	    @active = @hold;
	    @hold = ();
	}
    }
    return @ret;
}

=head2 decompose_interval

 Title   : decompose_interval
 Usage   : @decomposition = decompose_interval( \@overlappers )
 Function: Calculate the disjoint decomposition of a set of
           overlapping intervals, each annotated with a list of
           covering intervals
 Returns : array of arrayrefs of the form
           ( [[@interval] => [@indices_of_coverers]], ... )
 Args    : arrayref of intervals (arrayrefs like [$a0, $a1], with
 Note    : Each returned interval is associated with a list of indices of the
           original intervals that cover that decomposition component
           (scalar size of this list could be called the 'coverage coefficient')
 Note    : Coverage: each component of the decomp is completely contained
           in the input intervals that overlap it, by construction.
 Caveat  : This routine expects the members of @overlappers to overlap,
           but doesn't check this.

=cut

### what if the input intervals don't overlap?? They MUST overlap; that's
### what interval_tiling() is for.

sub decompose_interval {
    return unless $_[0]; # no input
    my @ints = @{$_[0]};
    my (%flat,@flat);
    ### this is ok, but need to handle the case where a lh and rh endpoint
    ### coincide...
    # decomposition --
    # flatten:
    # every lh endpoint generates (lh-1, lh)
    # every rh endpoint generates (rh, rh+)
    foreach (@ints) {
	$flat{$$_[0]-1}++;
	$flat{$$_[0]}++;
	$flat{$$_[1]}++;
	$flat{$$_[1]+1}++;
    }
    # sort, create singletons if nec.
    my @a;
    @a = sort {$a<=>$b} keys %flat;
    # throw out first and last (meeting a boundary condition)
    shift @a; pop @a;
    # look for singletons
    @flat = (shift @a, shift @a);
    if ( $flat[1]-$flat[0] == 1 ) {
	@flat = ($flat[0],$flat[0], $flat[1]);
    }
    while (my $a = shift @a) {
	if ($a-$flat[-2]==2) {
	    push @flat, $flat[-1]; # create singleton interval
	}
	push @flat, $a;
    }
    if ($flat[-1]-$flat[-2]==1 and @flat % 2) {
	push @flat, $flat[-1];
    }
    # component intervals are consecutive pairs
    my @decomp;
    while (my $a = shift @flat) {
	push @decomp, [$a, shift @flat];
    }

    # for each component, return a list of the indices of the input intervals
    # that cover the component.
    my @coverage;
    foreach my $i (0..$#decomp) {
	foreach my $j (0..$#ints) {
	    unless (are_disjoint($decomp[$i], $ints[$j])) {
		if (defined $coverage[$i]) {
		    push @{$coverage[$i]}, $j;
		}
		else {
		    $coverage[$i] = [$j];
		}
	    }
	}
    }
    return map { [$decomp[$_] => $coverage[$_]] } (0..$#decomp);
}    

=head2 are_disjoint

 Title   : are_disjoint
 Usage   : are_disjoint( [$a0, $a1], [$b0, $b1] )
 Function: Determine if two intervals are disjoint
 Returns : True if the intervals are disjoint, false if they overlap
 Args    : array of two intervals

=cut

sub are_disjoint {
    my ($int1, $int2) = @_;
    return 1 if ( $$int1[1] < $$int2[0] ) || ( $$int2[1] < $$int1[0]);
    return 0;
}

=head2 min_covering_interval

 Title   : min_covering_interval 
 Usage   : $interval = min_covering_interval( [$a0,$a1],[$b0,$b1] )
 Function: Determine the minimal covering interval for two intervals
 Returns : an interval
 Args    : two intervals

=cut

sub min_covering_interval {
    my ($int1, $int2) = @_;
    my @a = sort {$a <=> $b} (@$int1, @$int2);
    return [$a[0],$a[-1]];
}

=head2 get_intervals_from_hsps

 Title   : get_intervals_from_hsps
 Usage   : @intervals = get_intervals_from_hsps($type, @hsp_objects)
 Function: Return array of intervals of the form [ $start, $end ],
           from an array of hsp objects
 Returns : an array of intervals
 Args    : scalar $type, array of HSPI objects; where $type is one of 'hit',
           'subject', 'query'

=cut

sub get_intervals_from_hsps {
    my $type = shift;
    my @hsps;
    if (ref($type) =~ /HSP/) {
	push @hsps, $type;
	$type = 'query';
    }
    elsif ( !grep /^$type$/,qw( hit subject query ) ) {
	die "Unknown HSP type '$type'";
    }
    $type = 'hit' if $type eq 'subject';
    push @hsps, @_;
    my @ret;
    foreach (@hsps) {
#	push @ret, [ $_->start($type), $_->end($type)];
	push @ret, [ $_->range($type) ];
    }
    return @ret;
}

# fast, clear, nasty, brutish and short.
# for _allowable_filters(), _set_mapping()
# covers BLAST, FAST families
# FASTA is ambiguous (nt or aa) based on alg name only

my $alg_lookup = {
    'N'  => { 'mapping' => [1,1],
              'def_context' => ['p_','p_'],
	      'has_strand' => [1, 1],
              'has_frame' => [0, 0]},
    'P'  => { 'mapping' => [1,1],
              'def_context' => ['all','all'],
	      'has_strand' => [0, 0],
	      'has_frame' => [0, 0]},
    'X'  => { 'mapping' => [3, 1],
              'def_context' => [undef,'all'],
	      'has_strand' => [1, 0],
	      'has_frame' => [1, 0]}, 
    'Y'  => { 'mapping' => [3, 1],
              'def_context' => [undef,'all'],
	      'has_strand' => [1, 0],
	      'has_frame' => [1, 0]}, 
    'TA' => { 'mapping' => [1, 3],
              'def_context' => ['all',undef],
	      'has_strand' => [0, 1],
	      'has_frame' => [0, 1]}, 
    'TN' => { 'mapping' => [1, 3],
              'def_context' => ['p_',undef],
	      'has_strand' => [1,1],
	      'has_frame' => [0, 1]}, 
    'TX' => { 'mapping' => [3, 3],
              'def_context' => [undef,undef],
	      'has_strand' => [1, 1],
	      'has_frame' => [1, 1]}, 
    'TY' => { 'mapping' => [3, 3],
              'def_context' => [undef,undef],
	      'has_strand' => [1, 1],
	      'has_frame' => [1, 1]}
};
   
=head2 _allowable_filters
    
 Title   : _allowable_filters
 Usage   : _allowable_filters($Bio_Search_Hit_HitI, $type)
 Function: Return the HSP filters (strand, frame) allowed, 
           based on the reported algorithm
 Returns : String encoding allowable filters: 
           s = strand, f = frame
           Empty string if no filters allowed
           undef if algorithm unrecognized
 Args    : A Bio::Search::Hit::HitI object,
           scalar $type, one of 'hit', 'subject', 'query';
           default is 'query'

=cut

sub _allowable_filters {
    my $hit = shift;
    my $type = shift;
    $type ||= 'q';
    unless (grep /^$type$/, qw( h q s ) ) {
	warn("Unknown type '$type'; returning ''");
	return '';
    }
    $type = 'h' if $type eq 's';
    my $alg = $hit->algorithm;

    # pretreat (i.e., kludge it)
    $alg =~ /^RPS/ and ($alg) = ($alg =~ /\(([^)]+)\)/);

    for ($hit->algorithm) {
	/MEGABLAST/i && do {
	    return qr/[s]/;
	};
	/(.?)BLAST(.?)/i && do {
	    return $$alg_lookup{$1.$2}{$type};
	};
	/(.?)FAST(.?)/ && do {
	    return $$alg_lookup{$1.$2}{$type};
	};
	do { # unrecognized
	    last;
	};
    }
    return;
}


=head2 _set_attributes

 Title   : _set_attributes
 Usage   : $tiling->_set_attributes()
 Function: Sets attributes for invocant
           that depend on algorithm name
 Returns : True on success
 Args    : none
 Note    : setting based on the configuration table
           %alg_lookup

=cut

sub _set_attributes {
    my $self = shift;
    my $alg = $self->hit->algorithm;

    # pretreat (i.e., kludge it)
    $alg =~ /^RPS/ and ($alg) = ($alg =~ /\(([^)]+)\)/);
    
    for ($alg) {
	/MEGABLAST/i && do {
	    ($self->{_mapping_query},$self->{_mapping_hit}) = (1,1);
	    ($self->{_def_context_query},$self->{_def_context_hit}) =
		('p_','p_');
	    ($self->{_has_frame_query},$self->{_has_frame_hit}) =
		(0, 0);
	    ($self->{_has_strand_query},$self->{_has_strand_hit}) =
		(1, 1);
	    last;
	};
	/(.?)BLAST(.?)/i && do {
	    ($self->{_mapping_query},$self->{_mapping_hit}) = 
		@{$$alg_lookup{$1.$2}{mapping}};
	    ($self->{_def_context_query},$self->{_def_context_hit}) =
		@{$$alg_lookup{$1.$2}{def_context}};
	    ($self->{_has_frame_query},$self->{_has_frame_hit}) =	    
		@{$$alg_lookup{$1.$2}{has_frame}};
	    ($self->{_has_strand_query},$self->{_has_strand_hit}) =	    
		@{$$alg_lookup{$1.$2}{has_strand}};
	    last;
	};
	/(.?)FAST(.?)/ && do {
	    ($self->{_mapping_query},$self->{_mapping_hit}) = 
		@{$$alg_lookup{$1.$2}{mapping}};
	    ($self->{_def_context_query},$self->{_def_context_hit}) =
		@{$$alg_lookup{$1.$2}{def_context}};
	    ($self->{_has_frame_query},$self->{_has_frame_hit}) =	    
		@{$$alg_lookup{$1.$2}{has_frame}};
	    ($self->{_has_strand_query},$self->{_has_strand_hit}) =	    
		@{$$alg_lookup{$1.$2}{has_strand}};
	    last;
	};
	do { # unrecognized
	    $self->warn("Unrecognized algorithm '$alg'; defaults may not work");
	    ($self->{_mapping_query},$self->{_mapping_hit}) = (1,1);
	    ($self->{_def_context_query},$self->{_def_context_hit}) =
		('all','all');
	    ($self->{_has_frame_query},$self->{_has_frame_hit}) =	    
		(0,0);
	    ($self->{_has_strand_query},$self->{_has_strand_hit}) =	    
		(0,0);
	    return 0;
	    last;
	};
    }
    return 1;
}
           
sub _mapping_coeff {
    my $obj = shift;
    my $type = shift;
    my %type_i = ( 'query' => 0, 'hit' => 1 );
    unless ( ref($obj) && $obj->can('algorithm') ) {
	$obj->warn("Object type unrecognized");
	return undef;
    }
    $type ||= 'query';
    unless ( grep(/^$type$/, qw( query hit subject ) ) ) {
	$obj->warn("Sequence type unrecognized");
	return undef;
    }
    $type = 'hit' if $type eq 'subject';
    my $alg = $obj->algorithm;

    # pretreat (i.e., kludge it)
    $alg =~ /^RPS/ and ($alg) = ($alg =~ /\(([^)]+)\)/);
    
    for ($alg) {
	/MEGABLAST/i && do {
	    return 1;
	};
	/(.?)BLAST(.?)/i && do {
	    return $$alg_lookup{$1.$2}{'mapping'}[$type_i{$type}];
	};
	/(.?)FAST(.?)/ && do {
	    return $$alg_lookup{$1.$2}{'mapping'}[$type_i{$type}];
	};
	do { # unrecognized
	    last;
	};
    }
    return;
}

# a graphical depiction of a set of intervals
sub _ints_as_text {
    my $ints = shift;
    my @ints = @$ints;
    my %pos;
    for (@ints) {
	$pos{$$_[0]}++;
	$pos{$$_[1]}++;
    }
    
    my @pos = sort {$a<=>$b} keys %pos;
    @pos = map {sprintf("%03d",$_)} @pos;
#header
    my $max=0;
    $max = (length > $max) ? length : $max for (@pos);
    for my $j (0..$max-1) {
	my $i = $max-1-$j; 
	my @line = map { substr($_, $j, 1) || '0' } @pos;
	print join('', @line), "\n";
    }
    print '-' x @pos, "\n";
    undef %pos;
    @pos{map {sprintf("%d",$_)} @pos} = (0..@pos);
    foreach (@ints) {
	print ' ' x $pos{$$_[0]}, '[', ' ' x ($pos{$$_[1]}-$pos{$$_[0]}-1), ']', ' ' x (@pos-$pos{$$_[1]}), "\n";
    }
}



=head2 containing_hsps()

 Title   : containing_hsps
 Usage   : @hsps = containing_hsps($interval, @hsps_to_search)
 Function: Return a list of hsps whose coordinates completely contain the
           given $interval
 Returns : Array of HSP objects
 Args    : $interval : [$int1, $int2],
           array of HSP objects

=cut
# could be more efficient if hsps are assumed ordered...
sub containing_hsps {
    my $intvl = shift;
    my @hsps = @_;
    my @ret;
    my ($beg, $end) = @$intvl;
    foreach my $hsp (@hsps) {
	my ($start, $stop) = ($hsp->start, $hsp->end);
	push @ret, $hsp if ( $start <= $beg and $end <= $stop );
    }
    return @ret;
}



=head2 covering_groups()

 Title   : covering_groups
 Usage   : 
 Function: divide a list of **ordered,disjoint** intervals (as from a 
           coverage map) into a set of disjoint covering groups
 Returns : array of arrayrefs, each arrayref a covering set of 
           intervals
 Args    : array of intervals

=cut

sub covering_groups {
    my @intervals = @_;
    return unless @intervals;
    my (@groups, $grp);
    push @{$groups[0]}, shift @intervals;
    $grp = $groups[0];
    for (my $intvl = shift @intervals; @intervals; $intvl = shift @intervals) {
	if ( $intvl->[0] - $grp->[-1][1] == 1 ) { # intervals are direcly adjacent
	    push @$grp, $intvl;
	}
	else {
	    $grp = [$intvl];
	    push @groups, $grp;
	}
    }
    return @groups;
}

1;
# need our own subsequencer for hsps. 

package Bio::Search::HSP::HSPI;
$Bio::Search::HSP::HSPI::VERSION = '1.7.8';


use strict;
use warnings;

=head2 matches_MT

 Title   : matches_MT
 Usage   : $hsp->matches($type, $action, $start, $end)
 Purpose   : Get the total number of identical or conserved matches 
             in the query or sbjct sequence for the given HSP. Optionally can
             report data within a defined interval along the seq.
 Returns   : scalar int 
 Args      : 
 Comments  : Relies on seq_str('match') to get the string of alignment symbols
             between the query and sbjct lines which are used for determining
             the number of identical and conservative matches.
 Note      : Modeled on Bio::Search::HSP::HSPI::matches

=cut

sub matches_MT {
    use integer;
    my( $self, @args ) = @_;
    my($type, $action, $beg, $end) = $self->_rearrange( [qw(TYPE ACTION START END)], @args);
    my @actions = qw( identities conserved searchutils );
    
    # prep $type
    $self->throw("Type not specified") if !defined $type;
    $self->throw("Type '$type' unrecognized") unless grep(/^$type$/,qw(query hit subject));
    $type = 'hit' if $type eq 'subject';

    # prep $action
    $self->throw("Action not specified") if !defined $action;
    $self->throw("Action '$action' unrecognized") unless grep(/^$action$/, @actions);

    my ($len_id, $len_cons);
    my $c = Bio::Search::Tiling::MapTileUtils::_mapping_coeff($self, $type);
    if ((defined $beg && !defined $end) || (!defined $beg && defined $end)) {
	$self->throw("Both start and end are required");
    }
    elsif ( (!defined($beg) && !defined($end)) || !$self->seq_str('match') ) {
        ## Get data for the whole alignment.
	# the reported values x mapping 
	$self->debug("Sequence data not present in report; returning data for entire HSP") unless $self->seq_str('match');
	($len_id, $len_cons) = map { $c*$_ } ($self->num_identical, $self->num_conserved);
	for ($action) {
	    $_ eq 'identities'  && do {
		return $len_id;
	    };
	    $_ eq 'conserved'   && do {
		return $len_cons;
	    };
	    $_ eq 'searchutils' && do {
		return ($len_id, $len_cons);
	    };
	    do {
		$self->throw("What are YOU doing here?");
	    };
	}
    }
    else {
        ## Get the substring representing the desired sub-section of aln.
        my($start,$stop) = $self->range($type);
	if ( $beg < $start or $stop < $end ) {
	    $self->throw("Start/stop out of range [$start, $stop]");
	}

	# handle gaps
	my $match_str = $self->seq_str('match');
	if ($self->gaps) {
	    # strip the homology string of gap positions relative
	    # to the target type
	    $match_str = $self->seq_str('match');
	    my $tgt   = $self->seq_str($type);
	    my $encode = $match_str ^ $tgt;
	    my $zap = '-'^' ';
	    $encode =~ s/$zap//g;
	    $tgt =~ s/-//g;
	    $match_str = $tgt ^ $encode;
	    # match string is now the correct length for substr'ing below,
	    # given that start and end are gapless coordinates in the 
	    # blast report
	}

        my $seq = "";
	$seq = substr( $match_str, 
		       int( ($beg-$start)/Bio::Search::Tiling::MapTileUtils::_mapping_coeff($self, $type) ),
		       int( 1+($end-$beg)/Bio::Search::Tiling::MapTileUtils::_mapping_coeff($self, $type) ) 
	    );

        if(!CORE::length $seq) {
            $self->throw("Undefined sub-sequence ($beg,$end). Valid range = $start - $stop");
        }

        $seq =~ s/ //g;  # remove space (no info).
        $len_cons = (CORE::length $seq)*(Bio::Search::Tiling::MapTileUtils::_mapping_coeff($self,$type));
        $seq =~ s/\+//g;  # remove '+' characters (conservative substitutions)
        $len_id = (CORE::length $seq)*(Bio::Search::Tiling::MapTileUtils::_mapping_coeff($self,$type));
	for ($action) {
	    $_ eq 'identities' && do {
		return $len_id;
	    };
	    $_ eq 'conserved' && do {
		return $len_cons;
	    };
	    $_ eq 'searchutils' && do {
		return ($len_id, $len_cons);
	    };
	    do {
		$self->throw("What are YOU doing here?");
	    };
	}
    }
}

1;

package Bio::LocatableSeq;
$Bio::LocatableSeq::VERSION = '1.7.8';

use strict;
use warnings;

# mixin the Bio::FeatureHolderI implementation of 
# Bio::Seq -- for get_tiled_aln

=head2 get_SeqFeatures

 Title   : get_SeqFeatures
 Usage   :
 Function: Get the feature objects held by this feature holder.

           Features which are not top-level are subfeatures of one or
           more of the returned feature objects, which means that you
           must traverse the subfeature arrays of each top-level
           feature object in order to traverse all features associated
           with this sequence.

           Top-level features can be obtained by tag, specified in 
           the argument.

           Use get_all_SeqFeatures() if you want the feature tree
           flattened into one single array.

 Example :
 Returns : an array of Bio::SeqFeatureI implementing objects
 Args    : [optional] scalar string (feature tag)


=cut

sub get_SeqFeatures{
   my $self = shift;
   my $tag = shift;

   if( !defined $self->{'_as_feat'} ) {
       $self->{'_as_feat'} = [];
   }
   if ($tag) {
       return map { $_->primary_tag eq $tag ? $_ : () } @{$self->{'_as_feat'}};
   }
   else {
       return @{$self->{'_as_feat'}};
   }
}

=head2 feature_count

 Title   : feature_count
 Usage   : $seq->feature_count()
 Function: Return the number of SeqFeatures attached to a sequence
 Returns : integer representing the number of SeqFeatures
 Args    : None


=cut

sub feature_count {
    my ($self) = @_;
    if (defined($self->{'_as_feat'})) {
	return ($#{$self->{'_as_feat'}} + 1);
    } else {
	return 0;
    }
}

=head2 add_SeqFeature

 Title   : add_SeqFeature
 Usage   : $seq->add_SeqFeature($feat);
           $seq->add_SeqFeature(@feat);
 Function: Adds the given feature object (or each of an array of feature
           objects to the feature array of this
           sequence. The object passed is required to implement the
           Bio::SeqFeatureI interface.
 Returns : 1 on success
 Args    : A Bio::SeqFeatureI implementing object, or an array of such objects.


=cut

sub add_SeqFeature {
   my ($self,@feat) = @_;
   $self->{'_as_feat'} = [] unless $self->{'_as_feat'};
   foreach my $feat ( @feat ) {
       if( !$feat->isa("Bio::SeqFeatureI") ) {
	   $self->throw("$feat is not a SeqFeatureI and that's what we expect...");
       }
       $feat->attach_seq($self);
       push(@{$self->{'_as_feat'}},$feat);
   }
   return 1;
}

=head2 remove_SeqFeatures

 Title   : remove_SeqFeatures
 Usage   : $seq->remove_SeqFeatures();
 Function: Flushes all attached SeqFeatureI objects.

           To remove individual feature objects, delete those from the returned
           array and re-add the rest.
 Example :
 Returns : The array of Bio::SeqFeatureI objects removed from this seq.
 Args    : None


=cut

sub remove_SeqFeatures {
    my $self = shift;

    return () unless $self->{'_as_feat'};
    my @feats = @{$self->{'_as_feat'}};
    $self->{'_as_feat'} = [];
    return @feats;
}

1;