# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

"""Tests for SearchIO hmmer2-text indexing."""

import os
import unittest

from search_tests_common import CheckRaw, CheckIndex


class Hmmer2TextRawCases(CheckRaw):

    fmt = 'hmmer2-text'

    def test_hmmer2text_22_single_hmmsearch(self):
        """Test hmmer2-text raw string retrieval, single query, hmmsearch"""
        filename = os.path.join('Hmmer', 'text_22_hmmsearch_001.out')
        raw = """hmmsearch - search a sequence database with a profile HMM
HMMER 2.2g (August 2001)
Copyright (C) 1992-2001 HHMI/Washington University School of Medicine
Freely distributed under the GNU General Public License (GPL)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
HMM file:                   Peptidase_C1.hmm [Peptidase_C1]
Sequence database:          cysprot1b.fa
per-sequence score cutoff:  [none]
per-domain score cutoff:    [none]
per-sequence Eval cutoff:   <= 10        
per-domain Eval cutoff:     [none]
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query HMM:   Peptidase_C1
Accession:   PF00112
Description: Papain family cysteine protease
  [HMM has been calibrated; E-values are empirical estimates]

Scores for complete sequences (score includes all domains):
Sequence   Description                                  Score    E-value  N 
--------   -----------                                  -----    ------- ---
CATL_RAT                                                449.4     2e-135   1
CATL_HUMAN                                              444.5   6.1e-134   1
CATH_RAT                                                381.8   4.8e-115   1
PAPA_CARPA                                              337.7     9e-102   1

Parsed for domains:
Sequence   Domain  seq-f seq-t    hmm-f hmm-t      score  E-value
--------   ------- ----- -----    ----- -----      -----  -------
CATL_RAT     1/1     114   332 ..     1   337 []   449.4   2e-135
CATL_HUMAN   1/1     114   332 ..     1   337 []   444.5 6.1e-134
CATH_RAT     1/1     114   330 ..     1   337 []   381.8 4.8e-115
PAPA_CARPA   1/1     134   343 ..     1   337 []   337.7   9e-102

Alignments of top-scoring domains:
CATL_RAT: domain 1 of 1, from 114 to 332: score 449.4, E = 2e-135
                   *->lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgt
                      +P+++DWRe kg  VtpVK+QG qCGSCWAFSa g lEg+ ++kt  
    CATL_RAT   114    IPKTVDWRE-KG-CVTPVKNQG-QCGSCWAFSASGCLEGQMFLKT-- 155  

                   kawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikke
                       gkl+sLSEQ+LvDC++ d gn+      GCnG Glmd Af+Yik+ 
    CATL_RAT   156 ----GKLISLSEQNLVDCSH-DQGNQ------GCNG-GLMDFAFQYIKE- 192  

                   qIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgt
                       NgGl++E++Y     PY+    +kd                   g+
    CATL_RAT   193 ----NGGLDSEESY-----PYE----AKD-------------------GS 210  

                   CkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVai
                   Cky+  + ++     a+++g++d+p++     E+al+ka+a++GP+sVa+
    CATL_RAT   211 CKYR-AEYAV-----ANDTGFVDIPQQ-----EKALMKAVATVGPISVAM 249  

                   dasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGY
                   das+ s    q+Y+sG       +Y+++    C+++   +LdH+Vl+VGY
    CATL_RAT   250 DASHPS---LQFYSSG-------IYYEP---NCSSK---DLDHGVLVVGY 283  

                   GteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYW
                   G e+                                      ++++ +YW
    CATL_RAT   284 GYEG-T------------------------------------DSNKDKYW 296  

                   IVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi<-*
                   +VKNSWG++WG++GY++ia+++n    n+CG+a+ asypi   
    CATL_RAT   297 LVKNSWGKEWGMDGYIKIAKDRN----NHCGLATAASYPI    332  

CATL_HUMAN: domain 1 of 1, from 114 to 332: score 444.5, E = 6.1e-134
                   *->lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgt
                      +P s+DWRe kg +VtpVK+QG qCGSCWAFSa+galEg+ ++kt  
  CATL_HUMAN   114    APRSVDWRE-KG-YVTPVKNQG-QCGSCWAFSATGALEGQMFRKT-- 155  

                   kawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikke
                       g l+sLSEQ+LvDC+g + gn+      GCnG Glmd+Af+Y+++ 
  CATL_HUMAN   156 ----GRLISLSEQNLVDCSG-PQGNE------GCNG-GLMDYAFQYVQD- 192  

                   qIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgt
                       NgGl++E++Y     PY+    +++                    +
  CATL_HUMAN   193 ----NGGLDSEESY-----PYE----ATE-------------------ES 210  

                   CkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVai
                   Ckyn +k s+     a+++g++d+p +     E+al+ka+a++GP+sVai
  CATL_HUMAN   211 CKYN-PKYSV-----ANDTGFVDIPKQ-----EKALMKAVATVGPISVAI 249  

                   dasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGY
                   da++ s   F +Yk G       +Y ++   +C+++   + dH+Vl+VGY
  CATL_HUMAN   250 DAGHES---FLFYKEG-------IYFEP---DCSSE---DMDHGVLVVGY 283  

                   GteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYW
                   G e+                                      e+++ +YW
  CATL_HUMAN   284 GFES-T------------------------------------ESDNNKYW 296  

                   IVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi<-*
                   +VKNSWG++WG+ GY+++a+++     n+CGIas asyp+   
  CATL_HUMAN   297 LVKNSWGEEWGMGGYVKMAKDRR----NHCGIASAASYPT    332  

CATH_RAT: domain 1 of 1, from 114 to 330: score 381.8, E = 4.8e-115
                   *->lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgt
                       P s+DWR+ kg  V+pVK+QG  CGSCW FS++galE++ +i++  
    CATH_RAT   114    YPSSMDWRK-KGNVVSPVKNQG-ACGSCWTFSTTGALESAVAIAS-- 156  

                   kawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikke
                       gk   L EQqLvDC   +++n+      GC+G Gl+++AfeYi++ 
    CATH_RAT   157 ----GKMMTLAEQQLVDCAQ-NFNNH------GCQG-GLPSQAFEYILY- 193  

                   qIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgt
                       N+G++ E++Y     PY     gk+                   g+
    CATH_RAT   194 ----NKGIMGEDSY-----PYI----GKN-------------------GQ 211  

                   CkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVai
                   Ck+n +++++     a++k+ ++++ n    dE+a+ +a+a + Pvs a+
    CATH_RAT   212 CKFN-PEKAV-----AFVKNVVNITLN----DEAAMVEAVALYNPVSFAF 251  

                   dasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGY
                   +++e    DF++YksG       vY++    +C +tp + ++HAVl+VGY
    CATH_RAT   252 EVTE----DFMMYKSG-------VYSSN---SCHKTP-DKVNHAVLAVGY 286  

                   GteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYW
                   G +n g                                          YW
    CATH_RAT   287 GEQN-GLL----------------------------------------YW 295  

                   IVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi<-*
                   IVKNSWG++WG nGYf i+Rgkn     +CG+a +asypi   
    CATH_RAT   296 IVKNSWGSNWGNNGYFLIERGKN-----MCGLAACASYPI    330  

PAPA_CARPA: domain 1 of 1, from 134 to 343: score 337.7, E = 9e-102
                   *->lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgt
                      +Pe +DWR+ kg aVtpVK+QG +CGSCWAFSav ++Eg+++i+t  
  PAPA_CARPA   134    IPEYVDWRQ-KG-AVTPVKNQG-SCGSCWAFSAVVTIEGIIKIRT-- 175  

                   kawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikke
                       g+l  +SEQ+L+DCd+    ++      GCnG G+++ A++ + + 
  PAPA_CARPA   176 ----GNLNEYSEQELLDCDR---RSY------GCNG-GYPWSALQLVAQ- 210  

                   qIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgt
                         G++    Y     PY+    g++                     
  PAPA_CARPA   211 -----YGIHYRNTY-----PYE----GVQ-------------------RY 227  

                   CkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVai
                   C+++ +k+ +    +ak +g ++v+++    +E al + +a+ +PvsV  
  PAPA_CARPA   228 CRSR-EKGPY----AAKTDGVRQVQPY----NEGALLYSIAN-QPVSVVL 267  

                   dasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGY
                   +a +    DFqlY++G       ++++    +Cg+     +dHAV++VGY
  PAPA_CARPA   268 EAAGK---DFQLYRGG-------IFVG----PCGN----KVDHAVAAVGY 299  

                   GteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYW
                   G                                              +Y 
  PAPA_CARPA   300 G---------------------------------------------PNYI 304  

                   IVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi<-*
                   ++KNSWGt WGEnGY+ri+Rg+++s ++ CG+ ++  yp+   
  PAPA_CARPA   305 LIKNSWGTGWGENGYIRIKRGTGNS-YGVCGLYTSSFYPV    343  


Histogram of all scores:
score    obs    exp  (one = represents 1 sequences)
-----    ---    ---
> 337      4      -|====                                                       


% Statistical details of theoretical EVD fit:
              mu =  -195.8384
          lambda =     0.1423
chi-sq statistic =     0.0000
  P(chi-square)  =          0

Total sequences searched: 4

Whole sequence top hits:
tophits_s report:
     Total hits:           4
     Satisfying E cutoff:  4
     Total memory:         16K

Domain top hits:
tophits_s report:
     Total hits:           4
     Satisfying E cutoff:  4
     Total memory:         20K
"""  # noqa for pep8 W291 trailing whitespace
        self.check_raw(filename, 'Peptidase_C1', raw)

    def test_hmmer2text_22_single_hmmpfam(self):
        """Test hmmer2-text raw string retrieval, single query, hmmpfam"""
        filename = os.path.join('Hmmer', 'text_22_hmmpfam_001.out')
        raw = """hmmpfam - search one or more sequences against HMM database
HMMER 2.2g (August 2001)
Copyright (C) 1992-2001 HHMI/Washington University School of Medicine
Freely distributed under the GNU General Public License (GPL)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
HMM file:                 Pfam
Sequence file:            L77119.faa
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query sequence: gi|1522636|gb|AAC37060.1|
Accession:      [none]
Description:    M. jannaschii predicted coding region MJECS02 [Methanococcus jannaschii]

Scores for sequence family classification (score includes all domains):
Model       Description                                 Score    E-value  N 
--------    -----------                                 -----    ------- ---
Methylase_M Type I restriction modification system, M  -105.2     0.0022   1

Parsed for domains:
Model       Domain  seq-f seq-t    hmm-f hmm-t      score  E-value
--------    ------- ----- -----    ----- -----      -----  -------
Methylase_M   1/1     280   481 ..     1   279 []  -105.2   0.0022

Alignments of top-scoring domains:
Methylase_M: domain 1 of 1, from 280 to 481: score -105.2, E = 0.0022
                   *->lrnELentLWavADkLRGsmDaseYKdyVLGLlFlKYiSdkFlerri
                       ++EL+++  av+   R              L+F K++ dk      
  gi|1522636   280    NTSELDKKKFAVLLMNR--------------LIFIKFLEDK------ 306  

                   eieerktdtesepsldyakledqyeqlededlekedfyqkkGvFilPsql
                   +i+         p +   + +++y   ++   ++ ++y ++      + l
  gi|1522636   307 GIV---------PRDLLRRTYEDY---KKSNVLI-NYYDAY-L----KPL 338  

                   FwdfikeaeknkldedigtdldkifseledqialgypaSeedfkGlfpdl
                   F++++   e ++  ++++ + +    ++      + +       Glf ++
  gi|1522636   339 FYEVLNTPEDER--KENIRT-NPYYKDIPYL---N-G-------GLFRSN 374  

                   dfnsnkLgskaqarnetLtelidlfselelgtPmHNG.dfeelgikDlfG
                   ++  ++ +s+   +ne ++e+i+ +++ +++     G++ +el   D++G
  gi|1522636   375 NV--PNELSFTIKDNEIIGEVINFLERYKFTLSTSEGsEEVELNP-DILG 421  

                   DaYEYLLgkFAeneGKsGGeFYTPqeVSkLiaeiLtigqpsegdfsIYDP
                    +YE L+   Ae   K+ G +YTP e++  ia+ + i+  ++        
  gi|1522636   422 YVYEKLINILAEKGQKGLGAYYTPDEITSYIAKNT-IEPIVVE------- 463  

                   AcGSGSLllqaskflgehdgkrnaisyYGQEsn<-*
                             +++ ++    k+n+i +    s+   
  gi|1522636   464 ---------RFKEIIK--NWKINDINF----ST    481  

//
"""  # noqa for pep8 W291 trailing whitespace
        self.check_raw(filename, 'gi|1522636|gb|AAC37060.1|', raw)

    def test_hmmer2text_22_multiple_first_hmmpfam(self):
        """Test hmmer2-text raw string retrieval, multiple queries, hmmpfam"""
        filename = os.path.join('Hmmer', 'text_24_hmmpfam_001.out')
        raw = """hmmpfam - search one or more sequences against HMM database
HMMER 2.4i (December 2006)
Copyright (C) 1992-2006 HHMI Janelia Farm
Freely distributed under the GNU General Public License (GPL)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
HMM file:                 /home/bow/db/hmmer/Pfam_fs
Sequence file:            mult.fasta
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query sequence: random_s00
Accession:      [none]
Description:    [none]

Scores for sequence family classification (score includes all domains):
Model           Description                             Score    E-value  N 
--------        -----------                             -----    ------- ---
	[no hits above thresholds]

Parsed for domains:
Model           Domain  seq-f seq-t    hmm-f hmm-t      score  E-value
--------        ------- ----- -----    ----- -----      -----  -------
	[no hits above thresholds]

Alignments of top-scoring domains:
	[no hits above thresholds]
//
"""  # noqa for pep8 W291 trailing whitespace
        self.check_raw(filename, 'random_s00', raw)

    def test_hmmer2text_22_multiple_middle_hmmpfam(self):
        """Test hmmer2-text raw string retrieval, multiple queries, hmmpfam"""
        filename = os.path.join('Hmmer', 'text_24_hmmpfam_001.out')
        raw = """hmmpfam - search one or more sequences against HMM database
HMMER 2.4i (December 2006)
Copyright (C) 1992-2006 HHMI Janelia Farm
Freely distributed under the GNU General Public License (GPL)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
HMM file:                 /home/bow/db/hmmer/Pfam_fs
Sequence file:            mult.fasta
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query sequence: gi|4885477|ref|NP_005359.1|
Accession:      [none]
Description:    myoglobin [Homo sapiens]

Scores for sequence family classification (score includes all domains):
Model         Description                               Score    E-value  N 
--------      -----------                               -----    ------- ---
Globin        Globin                                    129.8    5.8e-37   1
tRNA-synt_1b  tRNA synthetases class I (W and Y)          1.8        2.2   1
Rotavirus_VP3 Rotavirus VP3 protein                      -1.2        7.9   1
DTHCT         DTHCT (NUC029) region                       1.5        9.8   1

Parsed for domains:
Model         Domain  seq-f seq-t    hmm-f hmm-t      score  E-value
--------      ------- ----- -----    ----- -----      -----  -------
Globin          1/1       7   143 ..     1   148 []   129.8  5.8e-37
DTHCT           1/1     101   106 ..   101   106 .]     1.5      9.8
tRNA-synt_1b    1/1     120   137 ..   320   337 .]     1.8      2.2
Rotavirus_VP3   1/1     134   147 ..     1    15 [.    -1.2      7.9

Alignments of top-scoring domains:
Globin: domain 1 of 1, from 7 to 143: score 129.8, E = 5.8e-37
                   *->dkalvkasWgkvkgtdnreelGaealarlFkayPdtktyFpkfgdls
                      +++lv+  Wgkv++  +++ +G+e+l rlFk +P+t ++F kf+ l+
  gi|4885477     7    EWQLVLNVWGKVEA--DIPGHGQEVLIRLFKGHPETLEKFDKFKHLK 51   

                   sadaikgspkfkaHgkkVlaalgeavkhLgnddddgnlkaalkkLaarHa
                   s d++k s+++k+Hg++Vl alg ++k       +g + a +k La +Ha
  gi|4885477    52 SEDEMKASEDLKKHGATVLTALGGILKK------KGHHEAEIKPLAQSHA 95   

                   erghvdpanFkllgeallIvvLaahlggeveftpevkaAWdkaldvvada
                   ++++++ ++ + ++e+++ +vL+++ +g  +f +++++A++kal  +  +
  gi|4885477    96 TKHKIPVKYLEFISECII-QVLQSKHPG--DFGADAQGAMNKALELFRKD 142  

                   l<-*
                   +   
  gi|4885477   143 M    143  

DTHCT: domain 1 of 1, from 101 to 106: score 1.5, E = 9.8
                   *->pvKYLe<-*
                      pvKYLe   
  gi|4885477   101    PVKYLE    106  

tRNA-synt_1b: domain 1 of 1, from 120 to 137: score 1.8, E = 2.2
                   *->hggelKkaaaeavnalls<-*
                      h+g++  +a+ a+n++l+   
  gi|4885477   120    HPGDFGADAQGAMNKALE    137  

Rotavirus_VP3: domain 1 of 1, from 134 to 147: score -1.2, E = 7.9
                   *->MkVLaLFrrgvalnY<-*
                       k L LFr+++a+nY   
  gi|4885477   134    -KALELFRKDMASNY    147  

//
"""  # noqa for pep8 W291 trailing whitespace
        self.check_raw(filename, 'gi|4885477|ref|NP_005359.1|', raw)

    def test_hmmer2text_22_multiple_last_hmmpfam(self):
        """Test hmmer2-text raw string retrieval, multiple queries, hmmpfam"""
        filename = os.path.join('Hmmer', 'text_24_hmmpfam_001.out')
        raw = """hmmpfam - search one or more sequences against HMM database
HMMER 2.4i (December 2006)
Copyright (C) 1992-2006 HHMI Janelia Farm
Freely distributed under the GNU General Public License (GPL)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
HMM file:                 /home/bow/db/hmmer/Pfam_fs
Sequence file:            mult.fasta
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query sequence: gi|125490392|ref|NP_038661.2|
Accession:      [none]
Description:    POU domain, class 5, transcription factor 1 isoform 1 [Mus musculus]

Scores for sequence family classification (score includes all domains):
Model           Description                             Score    E-value  N 
--------        -----------                             -----    ------- ---
Pou             Pou domain - N-terminal to homeobox d   171.2    2.4e-48   1
Homeobox        Homeobox domain                          86.7    6.5e-23   1
HTH_3           Helix-turn-helix                          7.5       0.33   1
WSC             WSC domain                                2.1        1.5   1
ComC            COMC family                               3.4        2.3   1
CBM_1           Fungal cellulose binding domain           3.3        3.6   1
Peptidase_M29   Thermophilic metalloprotease (M29)       -2.1        4.4   1
DUF1690         Protein of Unknown function (DUF1690)     1.1        4.5   1
DUF137          Protein of unknown function DUF137        1.0        4.7   1
DASH_Duo1       DASH complex subunit Duo1                 2.3        5.7   1
TFIIB           Transcription factor TFIIB repeat         2.6        6.1   1
DUF1392         Protein of unknown function (DUF1392)     0.8        6.7   1

Parsed for domains:
Model           Domain  seq-f seq-t    hmm-f hmm-t      score  E-value
--------        ------- ----- -----    ----- -----      -----  -------
WSC               1/1      59    67 ..    80    87 .]     2.1      1.5
CBM_1             1/1      61    73 ..     1    13 [.     3.3      3.6
Pou               1/1     131   205 ..     1    78 []   171.2  2.4e-48
ComC              1/1     132   145 ..     1    20 [.     3.4      2.3
Peptidase_M29     1/1     132   145 ..     1    14 [.    -2.1      4.4
DASH_Duo1         1/1     133   141 ..     1     9 [.     2.3      5.7
DUF1690           1/1     137   147 ..   151   161 .]     1.1      4.5
HTH_3             1/1     146   166 ..     1    25 [.     7.5     0.33
DUF137            1/1     197   213 ..   164   180 ..     1.0      4.7
Homeobox          1/1     224   280 ..     1    57 []    86.7  6.5e-23
DUF1392           1/1     248   269 ..   127   164 .]     0.8      6.7
TFIIB             1/1     253   268 ..     1    19 [.     2.6      6.1

Alignments of top-scoring domains:
WSC: domain 1 of 1, from 59 to 67: score 2.1, E = 1.5
                   *->p.pseiCGG<-*
                      p+++e CGG   
  gi|1254903    59    PpAYEFCGG    67   

CBM_1: domain 1 of 1, from 61 to 73: score 3.3, E = 3.6
                   *->vygQCGGigysGp<-*
                      +y+ CGG+ y Gp   
  gi|1254903    61    AYEFCGGMAYCGP    73   

Pou: domain 1 of 1, from 131 to 205: score 171.2, E = 2.4e-48
                   *->deatdleeLEkFAkeFKqRRIkLGyTQadVGlALgalygPGvnafSQ
                      d+++ ++eLE+FAk +Kq+RI+LGyTQadVGl+Lg l+g   ++fSQ
  gi|1254903   131    DMKALQKELEQFAKLLKQKRITLGYTQADVGLTLGVLFG---KVFSQ 174  

                   tTICRFEaLqLSfKNmcKLKPlLekWLeeAE<-*
                   tTICRFEaLqLS KNmcKL+PlLekW+eeA+   
  gi|1254903   175 TTICRFEALQLSLKNMCKLRPLLEKWVEEAD    205  

ComC: domain 1 of 1, from 132 to 145: score 3.4, E = 2.3
                   *->MKntvkkkllkkeLeqFkeL<-*
                      MK ++k      eLeqF  L   
  gi|1254903   132    MKALQK------ELEQFAKL    145  

Peptidase_M29: domain 1 of 1, from 132 to 145: score -2.1, E = 4.4
                   *->MdaFkkeLekyAeL<-*
                      M a +keLe +A L   
  gi|1254903   132    MKALQKELEQFAKL    145  

DASH_Duo1: domain 1 of 1, from 133 to 141: score 2.3, E = 5.7
                   *->aaLqkELeq<-*
                      +aLqkELeq   
  gi|1254903   133    KALQKELEQ    141  

DUF1690: domain 1 of 1, from 137 to 147: score 1.1, E = 4.5
                   *->eEvEqFKklvr<-*
                      +E EqF+kl++   
  gi|1254903   137    KELEQFAKLLK    147  

HTH_3: domain 1 of 1, from 146 to 166: score 7.5, E = 0.33
                   *->lkelRkkkelglsqeeLAeklGskv<-*
                      lk++R    lg++q+++   lG  v   
  gi|1254903   146    LKQKRI--TLGYTQADVGLTLG--V    166  

DUF137: domain 1 of 1, from 197 to 213: score 1.0, E = 4.7
                   *->LeeIVEnyDNkKnLkEv<-*
                      Le+ VE+ DN++nL+E+   
  gi|1254903   197    LEKWVEEADNNENLQEI    213  

Homeobox: domain 1 of 1, from 224 to 280: score 86.7, E = 6.5e-23
                   *->rrkRTtftpeQleeLEkeFqknrYPsreeReeLAkkLgLterqVkvW
                      +rkRT++ +  +  LE +F k+++Ps +++ ++A++LgL++++V+vW
  gi|1254903   224    KRKRTSIENRVRWSLETMFLKCPKPSLQQITHIANQLGLEKDVVRVW 270  

                   FQNRRaKwKk<-*
                   F+NRR+K K+   
  gi|1254903   271 FCNRRQKGKR    280  

DUF1392: domain 1 of 1, from 248 to 269: score 0.8, E = 6.7
                   *->PtLsqtttEGlCIFPrssqGnkmpnRfsLvrerDLVrV<-*
                      P+L q+t                +n++ L  e+D VrV   
  gi|1254903   248    PSLQQITH--------------IANQLGL--EKDVVRV    269  

TFIIB: domain 1 of 1, from 253 to 268: score 2.6, E = 6.1
                   *->ikrfadaLeLpeKkikVad<-*
                      i+++a +L+L +    V++   
  gi|1254903   253    ITHIANQLGLEK---DVVR    268  

//
"""  # noqa for pep8 W291 trailing whitespace
        self.check_raw(filename, 'gi|125490392|ref|NP_038661.2|', raw)


class Hmmer2TextIndexCases(CheckIndex):

    fmt = 'hmmer2-text'

    def test_hmmertext_text_21_hmmpfam_001(self):
        """Test hmmer2-text indexing, HMMER 2.1"""
        filename = os.path.join('Hmmer', 'text_21_hmmpfam_001.out')
        self.check_index(filename, self.fmt)

    def test_hmmertext_text_22_hmmpfam_001(self):
        """Test hmmer2-text indexing, HMMER 2.2"""
        filename = os.path.join('Hmmer', 'text_22_hmmpfam_001.out')
        self.check_index(filename, self.fmt)

    def test_hmmertext_text_23_hmmpfam_001(self):
        """Test hmmer2-text indexing, HMMER 2.3"""
        filename = os.path.join('Hmmer', 'text_23_hmmpfam_001.out')
        self.check_index(filename, self.fmt)

    def test_hmmertext_text_24_hmmpfam_001(self):
        """Test hmmer2-text indexing, HMMER 2.4"""
        filename = os.path.join('Hmmer', 'text_24_hmmpfam_001.out')
        self.check_index(filename, self.fmt)

    def test_hmmertext_text_22_hmmsearch_001(self):
        """Test hmmer2-text indexing, HMMER 2.2"""
        filename = os.path.join('Hmmer', 'text_22_hmmsearch_001.out')
        self.check_index(filename, self.fmt)


if __name__ == "__main__":
    runner = unittest.TextTestRunner(verbosity=2)
    unittest.main(testRunner=runner)
