File: LocationFactory.t

package info (click to toggle)
bioperl 1.6.1-2
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 40,768 kB
  • ctags: 12,005
  • sloc: perl: 174,299; xml: 13,923; sh: 1,941; lisp: 1,803; asm: 109; makefile: 53
file content (153 lines) | stat: -rw-r--r-- 6,630 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# -*-Perl-*- Test Harness script for Bioperl
# $Id: LocationFactory.t 15635 2009-04-14 19:11:13Z cjfields $

use strict;

BEGIN {
    use lib '.';
    use Bio::Root::Test;
    
    test_begin(-tests => 272);
	
    use_ok('Bio::Factory::FTLocationFactory');
}

my $simple_impl = "Bio::Location::Simple";
my $fuzzy_impl = "Bio::Location::Fuzzy";
my $split_impl = "Bio::Location::Split";

# Holds strings and results. The latter is an array of expected class name,
# min/max start position and position type, min/max end position and position
# type, location type, the number of locations, and the strand.
#
my %testcases = (
   # note: the following are directly taken from 
   # http://www.ncbi.nlm.nih.gov/collab/FT/#location
   "467" => [$simple_impl,
	    467, 467, "EXACT", 467, 467, "EXACT", "EXACT", 1, 1],
	"340..565" => [$simple_impl,
		 340, 340, "EXACT", 565, 565, "EXACT", "EXACT", 1, 1],
	"<345..500" => [$fuzzy_impl,
		 undef, 345, "BEFORE", 500, 500, "EXACT", "EXACT", 1, 1],
	"<1..888" => [$fuzzy_impl,
		 undef, 1, "BEFORE", 888, 888, "EXACT", "EXACT", 1, 1],
	"(102.110)" => [$fuzzy_impl,
		 102, 102, "EXACT", 110, 110, "EXACT", "WITHIN", 1, 1],
	"(23.45)..600" => [$fuzzy_impl,
		 23, 45, "WITHIN", 600, 600, "EXACT", "EXACT", 1, 1],
	"(122.133)..(204.221)" => [$fuzzy_impl,
		 122, 133, "WITHIN", 204, 221, "WITHIN", "EXACT", 1, 1],
	"123^124" => [$simple_impl,
		 123, 123, "EXACT", 124, 124, "EXACT", "IN-BETWEEN", 1, 1],
	"145^177" => [$fuzzy_impl,
		 145, 145, "EXACT", 177, 177, "EXACT", "IN-BETWEEN", 1, 1],
	"join(12..78,134..202)" => [$split_impl,
		 12, 12, "EXACT", 202, 202, "EXACT", "EXACT", 2, 1],
	"complement(join(4918..5163,2691..4571))" => [$split_impl,
		 2691, 2691, "EXACT", 5163, 5163, "EXACT", "EXACT", 2, -1],
	"complement(34..(122.126))" => [$fuzzy_impl,
		 34, 34, "EXACT", 122, 126, "WITHIN", "EXACT", 1, -1],
	"J00194:100..202" => [$simple_impl,
		 100, 100, "EXACT", 202, 202, "EXACT", "EXACT", 1, 1],
	# this variant is not really allowed by the FT definition
	# document but we want to be able to cope with it
	"J00194:(100..202)" => [$simple_impl,
		 100, 100, "EXACT", 202, 202, "EXACT", "EXACT", 1, 1],
	"((122.133)..(204.221))" => [$fuzzy_impl,
		 122, 133, "WITHIN", 204, 221, "WITHIN", "EXACT", 1, 1],
	"join(AY016290.1:108..185,AY016291.1:1546..1599)"=> [$split_impl,
		 108, 108, "EXACT", 185, 185, "EXACT", "EXACT", 2, undef],

	# UNCERTAIN locations and positions (Swissprot)
   "?2465..2774" => [$fuzzy_impl,
       2465, 2465, "UNCERTAIN", 2774, 2774, "EXACT", "EXACT", 1, 1],
   "22..?64" => [$fuzzy_impl,
       22, 22, "EXACT", 64, 64, "UNCERTAIN", "EXACT", 1, 1],
   "?22..?64" => [$fuzzy_impl,
       22, 22, "UNCERTAIN", 64, 64, "UNCERTAIN", "EXACT", 1, 1],
   "?..>393" => [$fuzzy_impl,
       undef, undef, "UNCERTAIN", 393, undef, "AFTER", "UNCERTAIN", 1, 1],
   "<1..?" => [$fuzzy_impl,
       undef, 1, "BEFORE", undef, undef, "UNCERTAIN", "UNCERTAIN", 1, 1],
   "?..536" => [$fuzzy_impl,
       undef, undef, "UNCERTAIN", 536, 536, "EXACT", "UNCERTAIN", 1, 1],
   "1..?" => [$fuzzy_impl,
       1, 1, "EXACT", undef, undef, "UNCERTAIN", "UNCERTAIN", 1, 1],
   "?..?" => [$fuzzy_impl,
       undef, undef, "UNCERTAIN", undef, undef, "UNCERTAIN", "UNCERTAIN", 1, 1],
   # Not working yet:
   #"12..?1" => [$fuzzy_impl,
   #    1, 1, "UNCERTAIN", 12, 12, "EXACT", "EXACT", 1, 1]
		 );

my $locfac = Bio::Factory::FTLocationFactory->new();
isa_ok($locfac,'Bio::Factory::LocationFactoryI');

# sorting is to keep the order constant from one run to the next
foreach my $locstr (keys %testcases) { 
	my $loc = $locfac->from_string($locstr);
	if($locstr eq "join(AY016290.1:108..185,AY016291.1:1546..1599)") {
		$loc->seq_id("AY016295.1");
	}
	my @res = @{$testcases{$locstr}};
	is(ref($loc), $res[0], $res[0]);
	is($loc->min_start(), $res[1]);
	is($loc->max_start(), $res[2]);
	is($loc->start_pos_type(), $res[3]);
	is($loc->min_end(), $res[4]);
	is($loc->max_end(), $res[5]);
	is($loc->end_pos_type(), $res[6]);
	is($loc->location_type(), $res[7]);
	my @locs = $loc->each_Location();
	is(@locs, $res[8]);
	my $ftstr = $loc->to_FTstring();
	# this is a somewhat ugly hack, but we want clean output from to_FTstring()
	# Umm, then these should really fail, correct?
	# Should we be engineering workarounds for tests?
	$locstr = "J00194:100..202" if $locstr eq "J00194:(100..202)";
	$locstr = "(122.133)..(204.221)" if $locstr eq "((122.133)..(204.221))";
	# now test
	is($ftstr, $locstr, "Location String: $locstr");
	# test strand production
	is($loc->strand(), $res[9]);
}

SKIP: {
    skip('nested matches in regex only supported in v5.6.1 and higher', 5) unless $^V gt v5.6.0;
    
	# bug #1674, #1765, 2101
	# EMBL-like 
	# join(20464..20694,21548..22763,join(complement(314652..314672),complement(232596..232990),complement(231520..231669)))
	# GenBank-like
	# join(20464..20694,21548..22763,complement(join(231520..231669,232596..232990,314652..314672)))
	# Note that
	# join(1000..2000,join(3000..4000,join(5000..6000,7000..8000)),9000..10000)
	# is the same as
	# join(1000..2000,3000..4000,5000..6000,7000..8000,9000..10000)
	# But I don't want to bother with it at this point
	my @expected = (# intentionally testing same expected string twice
					# as I am providing two different encodings
					# that should mean the same thing
	'join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))',
	'join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))',
	# ditto
	'join(20464..20694,21548..22763,complement(join(314652..314672,232596..232990,231520..231669)))',
	'join(20464..20694,21548..22763,complement(join(314652..314672,232596..232990,231520..231669)))',
	# this is just seen once
	'join(1000..2000,join(3000..4000,join(5000..6000,7000..8000)),9000..10000)',
	'order(S67862.1:72..75,join(S67863.1:1..788,1..19))'
   );

	for my $locstr (
		'join(11025..11049,join(complement(239890..240081),complement(241499..241580),complement(251354..251412),complement(315036..315294)))',
		'join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))',
		'join(20464..20694,21548..22763,complement(join(314652..314672,232596..232990,231520..231669)))',
		'join(20464..20694,21548..22763,join(complement(231520..231669),complement(232596..232990),complement(314652..314672)))',
		'join(1000..2000,join(3000..4000,join(5000..6000,7000..8000)),9000..10000)',
		'order(S67862.1:72..75,join(S67863.1:1..788,1..19))'
	   ) {
		my $loc = $locfac->from_string($locstr);
		my $ftstr = $loc->to_FTstring();
		is($ftstr, shift @expected, $locstr);
	}
}