File: clone.pm

package info (click to toggle)
libbio-db-gff-perl 1.7.4-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, forky, sid, trixie
  • size: 1,216 kB
  • sloc: perl: 9,976; makefile: 2
file content (160 lines) | stat: -rw-r--r-- 4,404 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
=head1 NAME

Bio::DB::GFF::Aggregator::clone -- Clone aggregator

=head1 SYNOPSIS

  use Bio::DB::GFF;

  # Open the sequence database
  my $db      = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
                                   -dsn     => 'dbi:mysql:elegans42',
				   -aggregator => ['transcript','clone'],
				 );

 ----------------------------------------------------------------------------
 Aggregator method: clone
 Main method:       -none-
 Sub methods:       Clone_left_end Clone_right_end region:Genomic_canonical
 ----------------------------------------------------------------------------

=head1 DESCRIPTION

Bio::DB::GFF::Aggregator::clone is one of the default aggregators, and
was written to be compatible with the C elegans GFF files.  It
aggregates raw "Clone_left_end", "Clone_right_end", and
"region:Genomic_canonical" features into composite features of type
"clone".

=cut

package Bio::DB::GFF::Aggregator::clone;
$Bio::DB::GFF::Aggregator::clone::VERSION = '1.7.4';
use strict;


use base qw(Bio::DB::GFF::Aggregator);

=head2 aggregate

 Title   : aggregate
 Usage   : $features = $a->aggregate($features,$factory)
 Function: aggregate a feature list into composite features
 Returns : an array reference containing modified features
 Args    : see L<Bio::DB::GFF::Aggregator>
 Status  : Public

The WormBase GFF model is unusual in that clones aren't identified as
a single feature with start and stop positions, but as two features, a
"left end" and a "right end".  One or both of these features may be
absent.  In order to accommodate this, the aggregator will return undef
for the start and/or stop if one or both of the ends are missing.

=cut

#'

# we look for features of type Sequence and add them to a pseudotype transcript
sub aggregate {
  my $self = shift;
  my $features = shift;
  my $factory  = shift;

  my $matchsub    = $self->match_sub($factory) or return;
  my $passthru    = $self->passthru_sub($factory);
  my $method      = $self->get_method;

  my (%clones,%types,@result);
  for my $feature (@$features) {

    if ($feature->group && $matchsub->($feature)) {

      if ($feature->method =~ /^region|Sequence$/ && $feature->source eq 'Genomic_canonical') {
	$clones{$feature->group}{canonical} = $feature;
      } elsif ($feature->method eq 'Clone_left_end') {
	$clones{$feature->group}{left} = $feature;
      } elsif ($feature->method eq 'Clone_right_end') {
	$clones{$feature->group}{right} = $feature;
      }
      push @result,$feature if $passthru && $passthru->($feature);
    } else {
      push @result,$feature;
    }
  }

  for my $clone (keys %clones) {
    my $canonical = $clones{$clone}{canonical} or next;

    # the genomic_canonical doesn't tell us where the clone starts and stops
    # so don't assume it
    my $duplicate = $canonical->clone;   # make a duplicate of the feature
    # munge the method and source fields
    my $source    = $duplicate->source;
    my $type = $types{$method,$source} ||= Bio::DB::GFF::Typename->new($method,$source);
    $duplicate->type($type);

    my ($start,$stop) = $duplicate->strand > 0 ? ('start','stop') : ('stop','start');
    @{$duplicate}{$start,$stop} =(undef,undef);

    $duplicate->{$start} = $clones{$clone}{left}{$start}  if exists $clones{$clone}{left};
    $duplicate->{$stop}  = $clones{$clone}{right}{$stop}  if exists $clones{$clone}{right};
    $duplicate->method($self->method);
    push @result,$duplicate;
  }

  @$features = @result;
}

=head2 method

 Title   : method
 Usage   : $aggregator->method
 Function: return the method for the composite object
 Returns : the string "clone"
 Args    : none
 Status  : Public

=cut

sub method { 'clone' }

=head2 part_names

 Title   : part_names
 Usage   : $aggregator->part_names
 Function: return the methods for the sub-parts
 Returns : the list ("Clone_left_end", "Clone_right_end", "region:Genomic_canonical")
 Args    : none
 Status  : Public

=cut

sub part_names {
  my $self = shift;
  return qw(Clone_left_end Clone_right_end region:Genomic_canonical Sequence:Genomic_canonical);
}

1;

__END__

=head1 BUGS

None reported.


=head1 SEE ALSO

L<Bio::DB::GFF>, L<Bio::DB::GFF::Aggregator>

=head1 AUTHOR

Lincoln Stein E<lt>lstein@cshl.orgE<gt>.

Copyright (c) 2001 Cold Spring Harbor Laboratory.

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut