1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
|
#!/usr/bin/env python
# test_infernal.py
from cogent.util.unit_test import TestCase, main
from cogent.parse.infernal import CmsearchParser,CmalignScoreParser
__author__ = "Jeremy Widmann"
__copyright__ = "Copyright 2007-2012, The Cogent Project"
__credits__ = ["Jeremy Widmann"]
__license__ = "GPL"
__version__ = "1.5.3"
__maintainer__ = "Jeremy Widmann"
__email__ = "jeremy.widmann@colorado.edu"
__status__ = "Development"
class CmsearchParserTests(TestCase):
"""Tests for CmsearchParser.
"""
def setUp(self):
"""setup for CmsearchParserTests.
"""
self.basic_results_empty = """# command data
# date
# CM summary
# Post search summary
"""
self.basic_results_hits = """# command data
# date
# CM summary
Model_1 Target_1 1 10 1 10 25.25 - 50
Model_1 Target_2 3 13 1 10 14.2 - 49
# Post search summary
"""
self.basic_res = [['Model_1','Target_1', 1, 10, 1, 10, 25.25, '-', 50],\
['Model_1','Target_2', 3, 13, 1, 10, 14.2, '-', 49]]
self.search_res = [['model1.cm','seq_0', 5, 23, 1, 19, 12.85, '-', 37],\
['model1.cm','seq_1', 1, 19, 1, 19, 14.36, '-', 47]]
def test_cmsearch_parser_no_data(self):
"""CmsearchParser should return correct result given no data.
"""
parser = CmsearchParser([])
self.assertEqual(list(parser),[])
def test_cmsearch_parser_no_res(self):
"""CmsearchParser should return correct result given no hits in result.
"""
parser = CmsearchParser(self.basic_results_empty.split('\n'))
self.assertEqual(list(parser),[])
def test_cmsearch_parser_basic(self):
"""CmsearchParser should return correct result given basic output.
"""
parser = CmsearchParser(self.basic_results_hits.split('\n'))
self.assertEqual(list(parser),self.basic_res)
def test_cmsearch_parser_full(self):
"""CmsearchParser should return correct result given cmsearch output.
"""
parser = CmsearchParser(SEARCH_DATA.split('\n'))
self.assertEqual(list(parser),self.search_res)
class CmalignScoreParserTests(TestCase):
"""Tests for CmalignScoreParser.
"""
def setUp(self):
"""setup for CmalignScoreParserTests.
"""
self.basic_results_hits = """# command: data
# date:
#
# cm summary
1 Target_1 83 55.02 2.94 0.956 00:00:00.01
2 Target_2 84 53.31 4.42 0.960 00:00:00.01
# post alignment summary
"""
self.basic_res = [[1,'Target_1',83,55.02,2.94,0.956,'00:00:00.01'],\
[2,'Target_2',84,53.31,4.42,0.960,'00:00:00.01']]
self.search_res = \
[[1,'AABL01002928.1/2363-2445',83,55.02,2.94,0.956,'00:00:00.01'],\
[2,'AACV01025780.1/26051-26134',84,53.31,4.42,0.960,'00:00:00.01']]
def test_cmalign_score_parser_no_data(self):
"""CmalignScoreParser should return correct result given no data.
"""
parser = CmalignScoreParser([])
self.assertEqual(list(parser),[])
def test_cmalign_score_parser_basic(self):
"""CmalignScoreParser should return correct result given basic output.
"""
parser = CmalignScoreParser(self.basic_results_hits.split('\n'))
self.assertEqual(list(parser),self.basic_res)
def test_cmalign_score_parser_full(self):
"""CmalignScoreParser should return correct result given cmalign output.
"""
parser = CmalignScoreParser(ALIGN_DATA.split('\n'))
self.assertEqual(list(parser),self.search_res)
SEARCH_DATA = """# command: cmsearch -T 0.0 --tabfile /tmp/tmpQGr0PGVeaEvGUkw2TM3e.txt --informat FASTA /tmp/tmp40hq0MqFPLn2lAymQeAD.txt /tmp/tmplTEQNgv0UA7sFSV0Z2RL.txt
# date: Mon Nov 8 13:51:12 2010
# num seqs: 3
# dbsize(Mb): 0.000124
#
# Pre-search info for CM 1: model1.cm
#
# rnd mod alg cfg beta bit sc cut
# --- --- --- --- ----- ----------
# 1 hmm fwd loc - 3.00
# 2 cm cyk loc 1e-10 0.00
# 3 cm ins loc 1e-15 0.00
#
# CM: model1.cm
# target coord query coord
# ---------------------- ------------
# model name target name start stop start stop bit sc E-value GC%
# ------------------------------- ----------- ---------- ---------- ----- ----- -------- -------- ---
model1.cm seq_0 5 23 1 19 12.85 - 37
model1.cm seq_1 1 19 1 19 14.36 - 47
#
# Post-search info for CM 1: /tmp/tmpWmLUo5hsKH6nyib4nGMq.cm
#
# rnd mod alg cfg beta bit sc cut num hits surv fract
# --- --- --- --- ----- ---------- -------- ----------
# 1 hmm fwd loc - 3.00 2 0.4113
# 2 cm cyk loc 1e-10 0.00 2 0.4113
# 3 cm ins loc 1e-15 0.00 2 0.3065
#
# run time
# -----------
# 00:00:00"""
ALIGN_DATA = """# cmalign :: align sequences to an RNA CM
# INFERNAL 1.0.2 (October 2009)
# Copyright (C) 2009 HHMI Janelia Farm Research Campus
# Freely distributed under the GNU General Public License (GPLv3)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# command: cmalign orig_alignments/RF01057_3NPQ_chain_A_results.cm RF01057_two_seqs.fasta
# date: Thu May 1 13:00:32 2011
#
# cm name algorithm config sub bands tau
# ------------------------- --------- ------ --- ----- ------
# RF01057_3NPQ_chain_A_resu opt acc global no hmm 1e-07
#
# bit scores
# ------------------
# seq idx seq name len total struct avg prob elapsed
# ------- -------------------------- ----- -------- -------- -------- -----------
1 AABL01002928.1/2363-2445 83 55.02 2.94 0.956 00:00:00.01
2 AACV01025780.1/26051-26134 84 53.31 4.42 0.960 00:00:00.01
# STOCKHOLM 1.0
#=GF AU Infernal 1.0.2
AABL01002928.1/2363-2445 CGCGCCGAGGAGCGCUGCGACGGCCCG...UCGAGGGCCGCCAGGCUCGG
AACV01025780.1/26051-26134 CCUGCCGAGGGGCGCUGCGACCGGAUCcaaUGAGGCCCGGCCAGGCUCGG
#=GC SS_cons :::::::::<--<<<--<--<<_____...________>>->--------
#=GC RF CuuuCCGAGGAGCGCUGcAACgGgcuc...uuacggcccGCcAGGCUCGG
AABL01002928.1/2363-2445 CGGGG...ACAAucgguUUUCCAACGGCGSUCUGUUUAU
AACV01025780.1/26051-26134 UAAGGuggCUUU.....GUAACAACGGCGCCCGGCUAGA
#=GC SS_cons -----...----.....--------->>>-->:::::::
#=GC RF aaagG...uaaa.....ccuaCAACGGCGCUCAcuCaca
//
#
# CPU time: 0.02u 0.00s 00:00:00.02 Elapsed: 00:00:00"""
if __name__ == '__main__':
main()
|