File: hmmer.t

package info (click to toggle)
bioperl 1.6.1-2
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 40,768 kB
  • ctags: 12,005
  • sloc: perl: 174,299; xml: 13,923; sh: 1,941; lisp: 1,803; asm: 109; makefile: 53
file content (176 lines) | stat: -rw-r--r-- 9,526 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# -*-Perl-*- Test Harness script for Bioperl
# $Id: SearchIO_hmmer.t 14989 2008-11-11 19:52:02Z cjfields $

use strict;

BEGIN {     
    use lib '.';
    use Bio::Root::Test;
    
    test_begin(-tests => 116);
	
	use_ok('Bio::SearchIO');
}

my $searchio = Bio::SearchIO->new(-format => 'hmmer',
				 -file   => test_input_file('hmmpfam.out'));

while( my $result = $searchio->next_result ) {
    is(ref($result),'Bio::Search::Result::HMMERResult');
    is($result->algorithm, 'HMMPFAM');
    is($result->algorithm_version, '2.1.1');
    is($result->hmm_name, 'pfam');
    is($result->sequence_file, '/home/birney/src/wise2/example/road.pep');
    is($result->query_name, 'roa1_drome');
    is($result->query_description, '');
    is($result->num_hits(), 2);
    my ($hsp,$hit);
    if( $hit = $result->next_model ) {
	is($hit->name, 'SEED');
	is($hit->raw_score, '146.1');
	float_is($hit->significance, 6.3e-40);
	is(ref($hit), 'Bio::Search::Hit::HMMERHit');
	is($hit->num_hsps, 1);

	if( defined( $hsp = $hit->next_domain ) ) {
	    is($hsp->hit->start, 1);
	    is($hsp->hit->end, 77);
	    is($hsp->query->start, 33);
	    is($hsp->query->end, 103);
	    is($hsp->score, 71.2);
	    float_is($hsp->evalue, 2.2e-17);
	    is($hsp->query_string, 'LFIGGLDYRTTDENLKAHFEKWGNIVDVVVMKD-----PRTKRSRGFGFITYSHSSMIDEAQK--SRpHKIDGRVVEP');
	    is($hsp->gaps('query'), 7);
	    is($hsp->hit_string, 'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnG-kelggrklrv');
	    is($hsp->homology_string, 'lf+g+L + +t+e Lk++F+k G iv++ +++D     + t++s+Gf+F+++  ++  + A +    +++++gr+++ ');
	    is(	length($hsp->homology_string), length($hsp->hit_string));
	    is( length($hsp->query_string), length($hsp->homology_string));
	}
    }
    if( defined ($hit = $result->next_model) ) {
	if( defined($hsp = $hit->next_domain) ) {
	    is($hsp->hit->start, 1);
	    is($hsp->hit->end, 77);
	    is($hsp->query->start, 124);
	    is($hsp->query->end, 194);
	    is($hsp->score, 75.5);
	    float_is($hsp->evalue, 1.1e-18);
	    is($hsp->query_string, 'LFVGALKDDHDEQSIRDYFQHFGNIVDINIVID-----KETGKKRGFAFVEFDDYDPVDKVVL-QKQHQLNGKMVDV');
	    is($hsp->gaps('query'), 6);
	    is($hsp->hit_string, 'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnGkelggrklrv');	 
	    is($hsp->homology_string, 'lfVg L  d +e+ ++d+F++fG iv+i+iv+D     ketgk +GfaFVeF++++ ++k +     ++l+g+ + v');
	    is(	length($hsp->homology_string), length($hsp->hit_string));
	    is( length($hsp->query_string), length($hsp->homology_string));
	}
	last;
    }
}
$searchio = Bio::SearchIO->new(-format => 'hmmer',
			      -file   => test_input_file('hmmsearch.out'));
while( my $result = $searchio->next_result ) {
    is(ref($result),'Bio::Search::Result::HMMERResult');
    is($result->algorithm, 'HMMSEARCH');
    is($result->algorithm_version, '2.0');
    is($result->hmm_name, 'HMM [SEED]');
    is($result->sequence_file, 'HMM.dbtemp.29591');
    is($result->database_name, 'HMM.dbtemp.29591');
    is($result->query_name, 'SEED');
    is($result->query_description, '');
    is($result->num_hits(), 1215);
    my $hit = $result->next_model;
    is($hit->name, 'Q91581');
    is($hit->description,'Q91581 POLYADENYLATION FACTOR 64 KDA SUBUN');
    float_is($hit->significance, 2e-31);
    is($hit->raw_score, 119.7);
    my $hsp = $hit->next_domain;
    is($hsp->score,119.7);
    float_is($hsp->evalue, 2e-31);
    is($hsp->query->start, 18);
    is($hsp->query->end, 89);
    is($hsp->hit->start, 1);
    is($hsp->hit->end, 77);
    is($hsp->query->seq_id(), 'SEED');
    is($hsp->hit->seq_id(), 'Q91581');   
}

$searchio = Bio::SearchIO->new(-format => 'hmmer',
			      -file   => test_input_file('L77119.hmmer'));

while( my $result = $searchio->next_result ) {
    is(ref($result),'Bio::Search::Result::HMMERResult');
    is($result->algorithm, 'HMMPFAM');
    is($result->algorithm_version, '2.2g');
    is($result->hmm_name, 'Pfam');
    is($result->sequence_file, 'L77119.faa');
    is($result->query_name, 'gi|1522636|gb|AAC37060.1|');
    is($result->query_description, 'M. jannaschii predicted coding region MJECS02 [Methanococcus jannaschii]');
    is($result->num_hits(), 1);
    my $hit = $result->next_hit;
    is($hit->name, 'Methylase_M');
    is($hit->description,'Type I restriction modification system, M');
    float_is($hit->significance, 0.0022);
    is($hit->raw_score, -105.2);
    my $hsp = $hit->next_hsp;
    is($hsp->score,-105.2);
    float_is($hsp->evalue, 0.0022);
    is($hsp->query->start, 280);
    is($hsp->query->end, 481);
    is($hsp->hit->start, 1);
    is($hsp->hit->end, 279);
    is($hsp->query->seq_id(), 'gi|1522636|gb|AAC37060.1|');
    is($hsp->hit->seq_id(), 'Methylase_M');
    is($hsp->hit_string, 'lrnELentLWavADkLRGsmDaseYKdyVLGLlFlKYiSdkFlerrieieerktdtesepsldyakledqyeqlededlekedfyqkkGvFilPsqlFwdfikeaeknkldedigtdldkifseledqialgypaSeedfkGlfpdldfnsnkLgskaqarnetLtelidlfselelgtPmHNG-dfeelgikDlfGDaYEYLLgkFAeneGKsGGeFYTPqeVSkLiaeiLtigqpsegdfsIYDPAcGSGSLllqaskflgehdgkrnaisyYGQEsn');
    is($hsp->query_string, 'NTSELDKKKFAVLLMNR--------------LIFIKFLEDK------GIV---------PRDLLRRTYEDY---KKSNVLI-NYYDAY-L----KPLFYEVLNTPEDER--KENIRT-NPYYKDIPYL---N-G-------GLFRSNNV--PNELSFTIKDNEIIGEVINFLERYKFTLSTSEGsEEVELNP-DILGYVYEKLINILAEKGQKGLGAYYTPDEITSYIAKNT-IEPIVVE----------------RFKEIIK--NWKINDINF----ST');
    is($hsp->homology_string, ' ++EL+++  av+   R              L+F K++ dk      +i+         p +   + +++y   ++   ++ ++y ++      + lF++++   e ++  ++++ + +    ++      + +       Glf ++++  ++ +s+   +ne ++e+i+ +++ +++     G++ +el   D++G +YE L+   Ae   K+ G +YTP e++  ia+ + i+  ++                  +++ ++    k+n+i +    s+');
    is(join(' ', $hsp->seq_inds('query', 'nomatch',1)), '280 288 289 293-295 300 304 311 313-315 317 324-326 332 335 337 344-346 348 355 358-361 364-366 372 379 383-385 389 396 400 404-408 412 416 417 422 426 429-431 434-436 439 441 446 450 451 455 459 460 463 464 468 471 472 478');
    is(join(' ', $hsp->seq_inds('hit', 'nomatch',1)), '1 9 10 14-16 18-31 35 39 42-47 51-59 61 63-65 67 72-74 77-79 82 86 89-94 96 103-105 107 110 111 116 118 120-123 126-131 133 135-141 145 150 151 154 158-160 164 171 175 179-183 187 191-193 198 202 205-207 210-212 215 217 222 226 227 231 233 236 237 240-257 261 264-267 273 275-278');
	is(join(' ', $hsp->seq_inds('query', 'gap',1)), '296 306 309 321 328 334 335 350 356 366-368 376 417 456 463 470 479');
	is(join(' ', $hsp->seq_inds('hit', 'gap',1)), '');
}


$searchio = Bio::SearchIO->new(-format => 'hmmer',
			      -file   => test_input_file('cysprot1b.hmmsearch'));


while( my $result = $searchio->next_result ) {
    is(ref($result),'Bio::Search::Result::HMMERResult');
    is($result->algorithm, 'HMMSEARCH');
    is($result->algorithm_version, '2.2g');
    is($result->hmm_name, 'Peptidase_C1.hmm [Peptidase_C1]');
    is($result->database_name, 'cysprot1b.fa');
    is($result->sequence_file, 'cysprot1b.fa');
    is($result->query_name, 'Peptidase_C1');
    is($result->query_accession, 'PF00112');
    is($result->query_description, 'Papain family cysteine protease');
    is($result->num_hits(), 4);
    my $hit = $result->next_hit;
    is($hit->name, 'CATL_RAT');
    is($hit->description,'');
    float_is($hit->significance, 2e-135);
    is($hit->raw_score, 449.4);
    my $hsp = $hit->next_hsp;
    is($hsp->score,449.4);
    float_is($hsp->evalue, 2e-135);
    is($hsp->query->start, 1);
    is($hsp->query->end, 337);
    is($hsp->hit->start, 114);
    is($hsp->hit->end, 332);
    is($hsp->query->seq_id(), 'Peptidase_C1');
    is($hsp->hit->seq_id(), 'CATL_RAT');
    is($hsp->hit_string, 'IPKTVDWRE-KG-CVTPVKNQG-QCGSCWAFSASGCLEGQMFLKT------GKLISLSEQNLVDCSH-DQGNQ------GCNG-GLMDFAFQYIKE-----NGGLDSEESY-----PYE----AKD-------------------GSCKYR-AEYAV-----ANDTGFVDIPQQ-----EKALMKAVATVGPISVAMDASHPS---LQFYSSG-------IYYEP---NCSSK---DLDHGVLVVGYGYEG-T------------------------------------DSNKDKYWLVKNSWGKEWGMDGYIKIAKDRN----NHCGLATAASYPI');
    is($hsp->homology_string, '+P+++DWRe kg  VtpVK+QG qCGSCWAFSa g lEg+ ++kt      gkl+sLSEQ+LvDC++ d gn+      GCnG Glmd Af+Yik+     NgGl++E++Y     PY+    +kd                   g+Cky+  + ++     a+++g++d+p++     E+al+ka+a++GP+sVa+das+ s    q+Y+sG       +Y+++    C+++   +LdH+Vl+VGYG e+                                      ++++ +YW+VKNSWG++WG++GY++ia+++n    n+CG+a+ asypi');
    is($hsp->query_string, 'lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgtkawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikkeqIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgtCkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVaidasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGYGteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYWIVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi');
    $hit = $result->next_hit;
    is($hit->name, 'CATL_HUMAN');
    is($hit->description,'');
    float_is($hit->significance, 6.1e-134);
    is($hit->raw_score, 444.5);
}

# test for bug 2632 - CS lines should get ignored without breaking the parser
$searchio = Bio::SearchIO->new(-format => 'hmmer', -file => test_input_file('hmmpfam_cs.out'));
my $result = $searchio->next_result;
my $hit = $result->next_hit;
my $hsp = $hit->next_hsp;
is $hsp->seq_str, 'CGV-GFIADVNNVANHKIVVQALEALTCMEHRGACSADRDSGDGAGITTAIPWNLFQKSLQNQNIKFEQnDSVGVGMLFLPAHKLKES--KLIIETVLKEENLEIIGWRLVPTVQEVLGKQAYLNKPHVEQVFCKSSNLSKDRLEQQLFLVRKKIEKYIGINGKDwaheFYICSLSCYTIVYKGMMRSAVLGQFYQDLYHSEYTSSFAIYHRRFSTNTMPKWPLAQPMR---------FVSHNGEINTLLGNLNWMQSREPLLQSKVWKDRIHELKPITNKDNSDSANLDAAVELLIASGRSPEEALMILVPEAFQNQPDFA-NNTEISDFYEYYSGLQEPWDGPALVVFTNGKV-IGATLDRNGL-RPARYVIT----KDNLVIVSSES';