# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

"""Tests for SearchIO hmmer3-text indexing."""

import unittest

from search_tests_common import CheckRaw, CheckIndex


class Hmmer3TextRawCases(CheckRaw):

    fmt = 'hmmer3-text'

    def test_hmmer3text_30_multiple_first(self):
        """Test hmmer3-text raw string retrieval, HMMER 3.0, multiple queries, first (text_30_hmmscan_001.out)"""
        filename = 'Hmmer/text_30_hmmscan_001.out'
        raw = """# hmmscan :: search sequence(s) against a profile database
# HMMER 3.0 (March 2010); http://hmmer.org/
# Copyright (C) 2010 Howard Hughes Medical Institute.
# Freely distributed under the GNU General Public License (GPLv3).
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query sequence file:             mult.fasta
# target HMM database:             /home/bow/db/hmmer/Pfam-A.hmm
# output directed to file:         hmmer_cases/text_hmmscan_mult.out
# per-seq hits tabular output:     hmmer_cases/tab_hmmscan_mult.out
# per-dom hits tabular output:     hmmer_cases/domtab_hmmscan_mult.out
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       random_s00  [L=32]
Scores for complete sequence (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Model    Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]


Domain annotation for each model (and alignments):

   [No targets detected that satisfy reporting thresholds]


Internal pipeline statistics summary:
-------------------------------------
Query sequence(s):                         1  (32 residues)
Target model(s):                       13672  (2396357 nodes)
Passed MSV filter:                       338  (0.0247221); expected 273.4 (0.02)
Passed bias filter:                       87  (0.00636337); expected 273.4 (0.02)
Passed Vit filter:                        23  (0.00168227); expected 13.7 (0.001)
Passed Fwd filter:                        14  (0.00102399); expected 0.1 (1e-05)
Initial search space (Z):              13672  [actual number of targets]
Domain search space  (domZ):               0  [number of targets reported over threshold]
# CPU time: 0.20u 0.12s 00:00:00.32 Elapsed: 00:00:00.19
# Mc/sec: 403.60
//
"""
        self.check_raw(filename, "random_s00", raw)

    def test_hmmer3text_30_multiple_middle(self):
        """Test hmmer3-text raw string retrieval, HMMER 3.0, multiple queries, middle (text_30_hmmscan_001.out)"""
        filename = 'Hmmer/text_30_hmmscan_001.out'
        raw = """# hmmscan :: search sequence(s) against a profile database
# HMMER 3.0 (March 2010); http://hmmer.org/
# Copyright (C) 2010 Howard Hughes Medical Institute.
# Freely distributed under the GNU General Public License (GPLv3).
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query sequence file:             mult.fasta
# target HMM database:             /home/bow/db/hmmer/Pfam-A.hmm
# output directed to file:         hmmer_cases/text_hmmscan_mult.out
# per-seq hits tabular output:     hmmer_cases/tab_hmmscan_mult.out
# per-dom hits tabular output:     hmmer_cases/domtab_hmmscan_mult.out
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       gi|4885477|ref|NP_005359.1|  [L=154]
Description: myoglobin [Homo sapiens]
Scores for complete sequence (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Model    Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------
      6e-21   74.6   0.3    9.2e-21   74.0   0.2    1.3  1  Globin   Globin


Domain annotation for each model (and alignments):
>> Globin  Globin
   #    score  bias  c-Evalue  i-Evalue hmmfrom  hmm to    alifrom  ali to    envfrom  env to     acc
 ---   ------ ----- --------- --------- ------- -------    ------- -------    ------- -------    ----
   1 !   74.0   0.2   6.7e-25   9.2e-21       1     107 [.       7     112 ..       7     113 .. 0.97

  Alignments for each domain:
  == domain 1    score: 74.0 bits;  conditional E-value: 6.7e-25
                                  HHHHHHHHHHHHCHHHHHHHHHHHHHHHHHSGGGGGGGCCCTTTT.HHHHHTSCHHHHHHHHHHHHHHHHHHCTTSHHHHHH CS
                       Globin   1 qkalvkaswekvkanaeeigaeilkrlfkaypdtkklFkkfgdls.aedlksspkfkahakkvlaaldeavknldnddnlka 81 
                                  +++lv   w+kv+a+++ +g+e+l rlfk +p+t ++F kf+ l+  +++k s+++k+h+++vl al+ ++k+   ++ ++a
  gi|4885477|ref|NP_005359.1|   7 EWQLVLNVWGKVEADIPGHGQEVLIRLFKGHPETLEKFDKFKHLKsEDEMKASEDLKKHGATVLTALGGILKK---KGHHEA 85 
                                  5789*********************************************************************...6899** PP

                                  HHHHHHHHHHTT-.--HHHHCCHHHHH CS
                       Globin  82 alkklgarHakrg.vdpanfklfgeal 107
                                  ++k l+++Ha+++ ++ ++ + ++e++
  gi|4885477|ref|NP_005359.1|  86 EIKPLAQSHATKHkIPVKYLEFISECI 112
                                  *********************999998 PP



Internal pipeline statistics summary:
-------------------------------------
Query sequence(s):                         1  (154 residues)
Target model(s):                       13672  (2396357 nodes)
Passed MSV filter:                       458  (0.0334991); expected 273.4 (0.02)
Passed bias filter:                      404  (0.0295494); expected 273.4 (0.02)
Passed Vit filter:                        31  (0.00226741); expected 13.7 (0.001)
Passed Fwd filter:                         1  (7.31422e-05); expected 0.1 (1e-05)
Initial search space (Z):              13672  [actual number of targets]
Domain search space  (domZ):               1  [number of targets reported over threshold]
# CPU time: 0.33u 0.16s 00:00:00.49 Elapsed: 00:00:00.21
# Mc/sec: 1757.33
//
"""  # noqa for pep8 W291 trailing whitespace
        self.check_raw(filename, "gi|4885477|ref|NP_005359.1|", raw)

    def test_hmmer3text_30_multiple_last(self):
        """Test hmmer3-text raw string retrieval, HMMER 3.0, multiple queries, last (text_30_hmmscan_001.out)"""
        filename = 'Hmmer/text_30_hmmscan_001.out'
        raw = """# hmmscan :: search sequence(s) against a profile database
# HMMER 3.0 (March 2010); http://hmmer.org/
# Copyright (C) 2010 Howard Hughes Medical Institute.
# Freely distributed under the GNU General Public License (GPLv3).
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query sequence file:             mult.fasta
# target HMM database:             /home/bow/db/hmmer/Pfam-A.hmm
# output directed to file:         hmmer_cases/text_hmmscan_mult.out
# per-seq hits tabular output:     hmmer_cases/tab_hmmscan_mult.out
# per-dom hits tabular output:     hmmer_cases/domtab_hmmscan_mult.out
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       gi|125490392|ref|NP_038661.2|  [L=352]
Description: POU domain, class 5, transcription factor 1 isoform 1 [Mus musculus]
Scores for complete sequence (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Model       Description
    ------- ------ -----    ------- ------ -----   ---- --  --------    -----------
      7e-37  124.8   0.5    1.4e-36  123.9   0.3    1.5  1  Pou         Pou domain - N-terminal to homeobox domain
    2.1e-18   65.5   1.1    4.1e-18   64.6   0.7    1.5  1  Homeobox    Homeobox domain
  ------ inclusion threshold ------
      0.012   15.6   0.0       0.16   12.0   0.0    2.2  2  HTH_31      Helix-turn-helix domain
      0.039   13.5   0.0      0.095   12.3   0.0    1.6  1  Homeobox_KN Homeobox KN domain
       0.14   10.5   0.1       0.26    9.6   0.1    1.4  1  DUF521      Protein of unknown function (DUF521)


Domain annotation for each model (and alignments):
>> Pou  Pou domain - N-terminal to homeobox domain
   #    score  bias  c-Evalue  i-Evalue hmmfrom  hmm to    alifrom  ali to    envfrom  env to     acc
 ---   ------ ----- --------- --------- ------- -------    ------- -------    ------- -------    ----
   1 !  123.9   0.3     5e-40   1.4e-36       3      75 .]     133     205 ..     131     205 .. 0.97

  Alignments for each domain:
  == domain 1    score: 123.9 bits;  conditional E-value: 5e-40
                            Pou   3 eldleeleefakefkqrrikLgltqadvgsalgalyGkefsqttIcrFEalqLslknmckLkpllekWLeeae 75 
                                    ++ ++ele+fak +kq+ri+Lg+tqadvg +lg+l+Gk+fsqttIcrFEalqLslknmckL+pllekW+eea+
  gi|125490392|ref|NP_038661.2| 133 KALQKELEQFAKLLKQKRITLGYTQADVGLTLGVLFGKVFSQTTICRFEALQLSLKNMCKLRPLLEKWVEEAD 205
                                    67899******************************************************************96 PP

>> Homeobox  Homeobox domain
   #    score  bias  c-Evalue  i-Evalue hmmfrom  hmm to    alifrom  ali to    envfrom  env to     acc
 ---   ------ ----- --------- --------- ------- -------    ------- -------    ------- -------    ----
   1 !   64.6   0.7   1.5e-21   4.1e-18       1      57 []     224     280 ..     224     280 .. 0.98

  Alignments for each domain:
  == domain 1    score: 64.6 bits;  conditional E-value: 1.5e-21
                                    SS--SS--HHHHHHHHHHCCTSSS--HHHHHHHHHH----HHHHHHHHHHHHHHHHH CS
                       Homeobox   1 rrkRttftkeqleeLeelFeknrypsaeereeLAkklgLterqVkvWFqNrRakekk 57 
                                    +rkRt++++     Le +F k+++ps ++++++A++lgL++++V+vWF+NrR+k k+
  gi|125490392|ref|NP_038661.2| 224 KRKRTSIENRVRWSLETMFLKCPKPSLQQITHIANQLGLEKDVVRVWFCNRRQKGKR 280
                                    79****************************************************997 PP

>> HTH_31  Helix-turn-helix domain
   #    score  bias  c-Evalue  i-Evalue hmmfrom  hmm to    alifrom  ali to    envfrom  env to     acc
 ---   ------ ----- --------- --------- ------- -------    ------- -------    ------- -------    ----
   1 ?   12.0   0.0   5.7e-05      0.16       1      35 [.     141     181 ..     141     184 .. 0.96
   2 ?    0.8   0.0      0.19   5.2e+02      39      62 ..     245     268 ..     243     270 .. 0.86

  Alignments for each domain:
  == domain 1    score: 12.0 bits;  conditional E-value: 5.7e-05
                         HTH_31   1 aLGarLralReraGLtqeevAerlg......vSastlsrlE 35 
                                    +++ +L++ R + G tq++v+  lg      +S++t++r E
  gi|125490392|ref|NP_038661.2| 141 QFAKLLKQKRITLGYTQADVGLTLGvlfgkvFSQTTICRFE 181
                                    6999***********************************99 PP

  == domain 2    score: 0.8 bits;  conditional E-value: 0.19
                         HTH_31  39 rgrpsaavlaalaralgldpaera 62 
                                    ++ ps+++++ +a+ lgl+ + ++
  gi|125490392|ref|NP_038661.2| 245 CPKPSLQQITHIANQLGLEKDVVR 268
                                    678**************9988765 PP

>> Homeobox_KN  Homeobox KN domain
   #    score  bias  c-Evalue  i-Evalue hmmfrom  hmm to    alifrom  ali to    envfrom  env to     acc
 ---   ------ ----- --------- --------- ------- -------    ------- -------    ------- -------    ----
   1 ?   12.3   0.0   3.5e-05     0.095       7      39 ..     244     276 ..     241     277 .. 0.91

  Alignments for each domain:
  == domain 1    score: 12.3 bits;  conditional E-value: 3.5e-05
                    Homeobox_KN   7 hnPYPskevkeelakqTglsrkqidnWFiNaRr 39 
                                    + P Ps +++  +a+q gl  + +  WF N R 
  gi|125490392|ref|NP_038661.2| 244 KCPKPSLQQITHIANQLGLEKDVVRVWFCNRRQ 276
                                    56779*************************996 PP

>> DUF521  Protein of unknown function (DUF521)
   #    score  bias  c-Evalue  i-Evalue hmmfrom  hmm to    alifrom  ali to    envfrom  env to     acc
 ---   ------ ----- --------- --------- ------- -------    ------- -------    ------- -------    ----
   1 ?    9.6   0.1   9.4e-05      0.26     273     334 ..     221     280 ..     197     294 .. 0.77

  Alignments for each domain:
  == domain 1    score: 9.6 bits;  conditional E-value: 9.4e-05
                         DUF521 273 adlaavleelnkakkeevdlvvlGcPhlsleeleelaellkgrkkkvsvelvvttsravlsk 334
                                    + +++ + +++++   +++ ++l cP  sl++++++a++l  +k    v+++ +  r+  ++
  gi|125490392|ref|NP_038661.2| 221 QARKRKRTSIENRVRWSLETMFLKCPKPSLQQITHIANQLGLEK--DVVRVWFCNRRQKGKR 280
                                    345666667778888899************************99..9999999988876554 PP



Internal pipeline statistics summary:
-------------------------------------
Query sequence(s):                         1  (352 residues)
Target model(s):                       13672  (2396357 nodes)
Passed MSV filter:                       603  (0.0441047); expected 273.4 (0.02)
Passed bias filter:                      465  (0.0340111); expected 273.4 (0.02)
Passed Vit filter:                        44  (0.00321826); expected 13.7 (0.001)
Passed Fwd filter:                         5  (0.000365711); expected 0.1 (1e-05)
Initial search space (Z):              13672  [actual number of targets]
Domain search space  (domZ):               5  [number of targets reported over threshold]
# CPU time: 0.51u 0.15s 00:00:00.66 Elapsed: 00:00:00.23
# Mc/sec: 3667.47
//
"""  # noqa for pep8 W291 trailing whitespace
        self.check_raw(filename, "gi|125490392|ref|NP_038661.2|", raw)

    def test_hmmer3text_30_single(self):
        """Test hmmer3-text raw string retrieval, HMMER 3.0, single query (text_30_hmmscan_003.out)"""
        filename = 'Hmmer/text_30_hmmscan_003.out'
        raw = """# hmmscan :: search sequence(s) against a profile database
# HMMER 3.0 (March 2010); http://hmmer.org/
# Copyright (C) 2010 Howard Hughes Medical Institute.
# Freely distributed under the GNU General Public License (GPLv3).
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query sequence file:             s01.fasta
# target HMM database:             /home/bow/db/hmmer/Pfam-A.hmm
# output directed to file:         hmmer_cases/text_hmmscan_s01.out
# per-seq hits tabular output:     hmmer_cases/tab_hmmscan_s01.out
# per-dom hits tabular output:     hmmer_cases/domtab_hmmscan_s01.out
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       gi|4885477|ref|NP_005359.1|  [L=154]
Description: myoglobin [Homo sapiens]
Scores for complete sequence (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Model    Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------
      6e-21   74.6   0.3    9.2e-21   74.0   0.2    1.3  1  Globin   Globin


Domain annotation for each model (and alignments):
>> Globin  Globin
   #    score  bias  c-Evalue  i-Evalue hmmfrom  hmm to    alifrom  ali to    envfrom  env to     acc
 ---   ------ ----- --------- --------- ------- -------    ------- -------    ------- -------    ----
   1 !   74.0   0.2   6.7e-25   9.2e-21       1     107 [.       7     112 ..       7     113 .. 0.97

  Alignments for each domain:
  == domain 1    score: 74.0 bits;  conditional E-value: 6.7e-25
                                  HHHHHHHHHHHHCHHHHHHHHHHHHHHHHHSGGGGGGGCCCTTTT.HHHHHTSCHHHHHHHHHHHHHHHHHHCTTSHHHHHH CS
                       Globin   1 qkalvkaswekvkanaeeigaeilkrlfkaypdtkklFkkfgdls.aedlksspkfkahakkvlaaldeavknldnddnlka 81 
                                  +++lv   w+kv+a+++ +g+e+l rlfk +p+t ++F kf+ l+  +++k s+++k+h+++vl al+ ++k+   ++ ++a
  gi|4885477|ref|NP_005359.1|   7 EWQLVLNVWGKVEADIPGHGQEVLIRLFKGHPETLEKFDKFKHLKsEDEMKASEDLKKHGATVLTALGGILKK---KGHHEA 85 
                                  5789*********************************************************************...6899** PP

                                  HHHHHHHHHHTT-.--HHHHCCHHHHH CS
                       Globin  82 alkklgarHakrg.vdpanfklfgeal 107
                                  ++k l+++Ha+++ ++ ++ + ++e++
  gi|4885477|ref|NP_005359.1|  86 EIKPLAQSHATKHkIPVKYLEFISECI 112
                                  *********************999998 PP



Internal pipeline statistics summary:
-------------------------------------
Query sequence(s):                         1  (154 residues)
Target model(s):                       13672  (2396357 nodes)
Passed MSV filter:                       458  (0.0334991); expected 273.4 (0.02)
Passed bias filter:                      404  (0.0295494); expected 273.4 (0.02)
Passed Vit filter:                        31  (0.00226741); expected 13.7 (0.001)
Passed Fwd filter:                         1  (7.31422e-05); expected 0.1 (1e-05)
Initial search space (Z):              13672  [actual number of targets]
Domain search space  (domZ):               1  [number of targets reported over threshold]
# CPU time: 0.28u 0.17s 00:00:00.45 Elapsed: 00:00:00.21
# Mc/sec: 1757.33
//
"""  # noqa for pep8 W291 trailing whitespace
        self.check_raw(filename, "gi|4885477|ref|NP_005359.1|", raw)


class Hmmer3TextIndexCases(CheckIndex):

    fmt = 'hmmer3-text'

    def test_hmmertext_text_30_hmmscan_001(self):
        """Test hmmer3-text indexing, HMMER 3.0, multiple queries"""
        filename = 'Hmmer/text_30_hmmscan_001.out'
        self.check_index(filename, self.fmt)

    def test_hmmertext_text_30_hmmscan_002(self):
        """Test hmmer3-text indexing, HMMER 3.0, single query, no hits"""
        filename = 'Hmmer/text_30_hmmscan_002.out'
        self.check_index(filename, self.fmt)

    def test_hmmertext_text_30_hmmscan_006(self):
        """Test hmmer3-text indexing, HMMER 3.0, single query, multiple hits"""
        filename = 'Hmmer/text_30_hmmscan_006.out'
        self.check_index(filename, self.fmt)

    def test_hmmertext_text_30_hmmscan_007(self):
        """Test hmmer3-text indexing, HMMER 3.0, single query, no alignments"""
        filename = 'Hmmer/text_30_hmmscan_007.out'
        self.check_index(filename, self.fmt)

    def test_hmmertext_text_30_hmmscan_008(self):
        """Test hmmer3-text indexing, HMMER 3.0, single query, no alignment width"""
        filename = 'Hmmer/text_30_hmmscan_008.out'
        self.check_index(filename, self.fmt)

    def test_hmmertext_text_30_hmmsearch_005(self):
        """Test hmmer3-text indexing, HMMER 3.0, multiple queries"""
        filename = 'Hmmer/text_30_hmmsearch_005.out'
        self.check_index(filename, self.fmt)


if __name__ == "__main__":
    runner = unittest.TextTestRunner(verbosity=2)
    unittest.main(testRunner=runner)
