File: Generic.t

package info (click to toggle)
bioperl 1.7.7-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 35,888 kB
  • sloc: perl: 94,151; xml: 14,982; makefile: 20
file content (329 lines) | stat: -rw-r--r-- 9,758 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
# -*-Perl-*- Test Harness script for Bioperl
# $Id$

use strict;

BEGIN {
    use Bio::Root::Test;

    test_begin(-tests => 364);

    use_ok 'Bio::Seq';
    use_ok 'Bio::SeqIO';
    use_ok 'Bio::SeqFeature::Generic';
}


my ($feat, $str, $feat2, $pair, @sft);

my $DEBUG = test_debug();

ok $feat = Bio::SeqFeature::Generic->new(
    -start => 40,
    -end => 80,
    -strand => 1,
);
is $feat->primary_tag, '';
is $feat->source_tag, '';
is $feat->display_name, '';

ok $feat = Bio::SeqFeature::Generic->new(
    -start => 40,
    -end => 80,
    -strand => 1,
    -primary => 'exon',
    -source  => 'internal',
    -display_name => 'my exon feature',
    -tag => {
        silly => 20,
        new => 1
    }
);

is $feat->start, 40, 'Start of feature location';
is $feat->end, 80, 'End of feature location';
is $feat->primary_tag, 'exon', 'Primary tag';
is $feat->source_tag, 'internal', 'Source tag';
is $feat->display_name, 'my exon feature', 'Display name';
is $feat->phase, undef, 'Undef phase by default';
is $feat->phase(1), 1, 'Phase accessor returns';
is $feat->phase, 1, 'Phase is persistent';

ok $feat->gff_string();

ok $feat2 = Bio::SeqFeature::Generic->new(
    -start => 400,
    -end => 440,
    -strand => 1,
    -primary => 'other',
    -source  => 'program_a',
        -phase => 1,
        -tag => {
            silly => 20,
            new => 1
        }
);
is $feat2->phase, 1, 'Set phase from constructor';


# Test attaching a SeqFeature::Generic to a Bio::Seq or SeqFeature::Generic
{
    # Make the parent sequence object
    my $seq = Bio::Seq->new(
        -seq        => 'aaaaggggtttt',
        -display_id => 'test',
        -alphabet   => 'dna',
    );

    # Make a SeqFeature
    ok my $sf1 = Bio::SeqFeature::Generic->new(
        -start  => 4,
        -end    => 9,
        -strand => 1,
    );

    # Add the SeqFeature to the parent
    ok $seq->add_SeqFeature($sf1);

    # Test that it gives the correct sequence
    is $sf1->start, 4, 'Start of first seqfeature';
    is $sf1->end, 9, 'End of first seqfeature';
    is $sf1->strand, 1, 'Strand of first seqfeature';
    ok my $sf_seq1 = $sf1->seq;
    is $sf_seq1->seq, 'aggggt', 'Sequence of first seqfeature';

    # Make a second seqfeature on the opposite strand
    ok my $sf2 = Bio::SeqFeature::Generic->new(
        -start  => 4,
        -end    => 9,
        -strand => -1,
    );

    # Now add the PrimarySeq to the seqfeature before adding it to the parent
    ok $sf2->attach_seq($seq->primary_seq);
    ok $seq->add_SeqFeature($sf2);

    # Test again that we have the correct sequence
    is $sf2->start, 4, 'Start of second seqfeature';
    is $sf2->end, 9, 'End of second seqfeature';
    is $sf2->strand, -1, 'Strand of second seqfeature';
    ok my $sf_seq2 = $sf2->seq;
    is $sf_seq2->seq, 'acccct', 'Sequence of second seqfeature';
}


# Some tests for bug #947

ok my $sfeat = Bio::SeqFeature::Generic->new(-primary => 'test');

ok $sfeat->add_sub_SeqFeature(
    Bio::SeqFeature::Generic->new(
        -start => 2,
        -end   => 4,
        -primary => 'sub1'
    ),
    'EXPAND'
);

ok $sfeat->add_sub_SeqFeature(
    Bio::SeqFeature::Generic->new(
        -start => 6,
        -end   => 8,
        -primary => 'sub2'
    ),
    'EXPAND'
);

is $sfeat->start, 2, 'sfeat start for EXPAND-ED feature (bug #947)';
is $sfeat->end, 8, 'sfeat end for EXPAND-ED feature (bug #947)';

# Some tests to see if we can set a feature to start at 0
ok $sfeat = Bio::SeqFeature::Generic->new(-start => 0, -end => 0 );

ok defined $sfeat->start;
is $sfeat->start, 0, 'Can create feature starting and ending at 0';
ok defined $sfeat->end;
is $sfeat->end, 0, 'Can create feature starting and ending at 0';


# Test for bug when Locations are not created explicitly

ok my $feat1 = Bio::SeqFeature::Generic->new(
    -start => 1,
    -end   => 15,
    -strand=> 1
);

ok $feat2 = Bio::SeqFeature::Generic->new(
    -start => 10,
    -end   => 25,
    -strand=> 1
);

ok my $overlap = $feat1->location->union($feat2->location);
is $overlap->start, 1;
is $overlap->end,   25;

ok my $intersect = $feat1->location->intersection($feat2->location);
is $intersect->start, 10;
is $intersect->end,   15;


# Now let's test spliced_seq
ok my $seqio = Bio::SeqIO->new(
    -file => test_input_file('AY095303S1.gbk'),
    -format  => 'genbank'
);
isa_ok $seqio, 'Bio::SeqIO';
ok my $geneseq = $seqio->next_seq;
isa_ok $geneseq, 'Bio::Seq';
ok my ($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures;
my $db;

SKIP: {
    test_skip(-tests => 5,
              -requires_modules => [qw(Bio::DB::GenBank)],
              -requires_networking => 1);

    use_ok 'Bio::DB::GenBank';

    $db = Bio::DB::GenBank->new(-verbose=> $DEBUG);
    $CDS->verbose(-1);
    my $cdsseq = $CDS->spliced_seq(-db => $db,-nosort => 1);

    is $cdsseq->subseq(1,76),
       'ATGCAGCCATACGCTTCCGTGAGCGGGCGATGTCTATCTAGACCAGATGCATTGCATGTGATACCGTTTGGGCGAC';
    is $cdsseq->translate->subseq(1,100),
       'MQPYASVSGRCLSRPDALHVIPFGRPLQAIAGRRFVRCFAKGGQPGDKKKLNVTDKLRLGNTPPTLDVLK'.
       'APRPTDAPSAIDDAPSTSGLGLGGGVASPR';
    # Test what happens without
    $cdsseq = $CDS->spliced_seq(-db => $db,-nosort => 1);
    is $cdsseq->subseq(1,76),
       'ATGCAGCCATACGCTTCCGTGAGCGGGCGATGTCTATCTAGACCAGATGCATTGCATGTGATACCGTTTGGGCGAC';
    is $cdsseq->translate->subseq(1,100),
       'MQPYASVSGRCLSRPDALHVIPFGRPLQAIAGRRFVRCFAKGGQPGDKKKLNVTDKLRLGNTPPTLDVLK'.
       'APRPTDAPSAIDDAPSTSGLGLGGGVASPR';
}

ok $seqio = Bio::SeqIO->new(
    -file => test_input_file('AF032047.gbk'),
    -format  => 'genbank'
);
isa_ok $seqio, 'Bio::SeqIO';
ok $geneseq = $seqio->next_seq;
isa_ok $geneseq, 'Bio::Seq';
ok( ($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures );
SKIP: {
    test_skip(-tests => 2,
              -requires_modules => ['Bio::DB::GenBank', 'LWP::Protocol::https'],
              -requires_networking => 1);

    my $cdsseq = $CDS->spliced_seq( -db => $db, -nosort => 1);
    is $cdsseq->subseq(1,70),
       'ATGGCTCGCTTCGTGGTGGTAGCCCTGCTCGCGCTACTCTCTCTGTCTGGCCTGGAGGCTATCCAGCATG';
    is $cdsseq->translate->seq,
       'MARFVVVALLALLSLSGLEAIQHAPKIQVYSRHPAENGKPNFLNCYVSGFHPSDIEVDLLKNGKKIEKVE'.
       'HSDLSFSKDWSFYLLYYTEFTPNEKDEYACRVSHVTFPTPKTVKWDRTM*';
}


# Trans-spliced

ok $seqio = Bio::SeqIO->new(
    -format => 'genbank',
    -file => test_input_file('NC_001284.gbk')
);
isa_ok $seqio, 'Bio::SeqIO';
ok my $genome = $seqio->next_seq;

for my $cds (grep { $_->primary_tag eq 'CDS' } $genome->get_SeqFeatures) {
   ok my $spliced = $cds->spliced_seq(-nosort => 1)->translate->seq;
   chop $spliced; # remove stop codon
   is $spliced, ($cds->get_tag_values('translation'))[0], 'spliced_seq translation matches expected';
}

# Spliced_seq phase
ok my $seq = Bio::SeqIO->new(
    -format => 'fasta',
    -file   => test_input_file('sbay_c127.fas')
)->next_seq;

ok my $sf = Bio::SeqFeature::Generic->new(
    -verbose => -1,
    -start => 263,
    -end => 721,
    -strand => 1,
    -primary => 'splicedgene'
);

ok $sf->attach_seq($seq);

my %phase_check = (
    'TTCAATGACT' => 'FNDFYSMGKS',
    'TCAATGACTT' => 'SMTSIPWVNQ',
    'GTTCAATGAC' => 'VQ*LLFHG*I',
);

for my $phase (-1..3) {
    ok my $sfseq = $sf->spliced_seq(-phase => $phase);
    ok exists $phase_check{$sfseq->subseq(1,10)};
    is $sfseq->translate->subseq(1,10), $phase_check{$sfseq->subseq(1,10)}, 'phase check';
}

# Tags
ok $sf->add_tag_value('note','n1');
ok $sf->add_tag_value('note','n2');
ok $sf->add_tag_value('comment','c1');
is_deeply [sort $sf->get_all_tags()], [sort qw(note comment)] , 'Tags found';
is_deeply [sort $sf->get_tagset_values('note')], [sort qw(n1 n2)] , 'get_tagset_values tag values found';
is_deeply [sort $sf->get_tagset_values(qw(note comment))], [sort qw(c1 n1 n2)] , 'get_tagset_values tag values for multiple tags found';
lives_ok {
  is_deeply [sort $sf->get_tag_values('note')], [sort qw(n1 n2)] , 'get_tag_values tag values found';
} 'get_tag_values lives with tag';
lives_ok {
  is_deeply [$sf->get_tagset_values('notag') ], [], 'get_tagset_values no tag values found';
} 'get_tagset_values lives with no tag';
throws_ok { $sf->get_tag_values('notag') } qr/tag value that does not exist/, 'get_tag_values throws with no tag';

# Circular sequence SeqFeature tests
$seq = Bio::SeqIO->new(
    -format => 'genbank',
    -file   => test_input_file('PX1CG.gb')
)->next_seq;

ok $seq->is_circular, 'Phi-X174 genome is circular';

# Retrieving the spliced sequence from any split location requires spliced_seq()

my %sf_data = (
    #       start
    'A'  => [3981, 136, 1, 1542, 'join(3981..5386,1..136)', 'ATGGTTCGTT'],
    'A*' => [4497, 136, 1, 1026, 'join(4497..5386,1..136)', 'ATGAAATCGC'],
    'B'  => [5075, 51,  1, 363,  'join(5075..5386,1..51)',  'ATGGAACAAC'],
);

ok my @split_sfs = grep {
    $_->location->isa('Bio::Location::SplitLocationI')
    } $seq->get_SeqFeatures();

is @split_sfs, 3, 'Only 3 split locations';

for my $sf (@split_sfs) {
    isa_ok $sf->location, 'Bio::Location::SplitLocationI';
    ok my ($tag) = $sf->get_tag_values('product');
    my ($start, $end, $strand, $length, $ftstring, $first_ten) = @{$sf_data{$tag}};

    is $sf->location->to_FTstring, $ftstring, 'Feature string';
    is $sf->spliced_seq->subseq(1,10), $first_ten, 'First ten nucleotides';
    is $sf->strand, $strand, 'Strand';
    is $sf->start, $start, 'Start';
    is $sf->end, $end, 'End';
    is $sf->length, $length, 'Expected length';
}

# spliced_seq() on the reverse strand, bug #88 (github)
$seq = Bio::SeqIO->new( -file => test_input_file('AF222649-rc.gbk') )->next_seq;
# All should start with "ATG"
for my $feat ( $seq->get_SeqFeatures('CDS') ) {
    ok $feat->spliced_seq->seq =~ /^ATG/, "Reverse strand is spliced correctly";
}