File: test_column.py

package info (click to toggle)
python-cogent 1.5.3-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 16,424 kB
  • ctags: 24,343
  • sloc: python: 134,200; makefile: 100; ansic: 17; sh: 10
file content (141 lines) | stat: -rw-r--r-- 6,121 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env python

from cogent.util.unit_test  import TestCase, main
from cogent.core.info       import Info
from cogent.parse.foldalign import find_struct
from cogent.parse.pfold     import tree_struct_sep
from cogent.parse.column    import column_parser

__author__ = "Shandy Wikman"
__copyright__ = "Copyright 2007-2012, The Cogent Project"
__contributors__ = ["Shandy Wikman"]
__license__ = "GPL"
__version__ = "1.5.3"
__maintainer__ = "Shandy Wikman"
__email__ = "ens01svn@cs.umu.se"
__status__ = "Development"

class ColumnParserTest(TestCase):
    """Provides tests for Column format RNA secondary structure parsers"""

    def setUp(self):
        """Setup function"""
        
        #output
        self.pfold_out = PFOLD
        self.foldalign_out = FOLDALIGN
        #expected
        self.pfold_exp = [['GCAGAUUUAGAUGC',[(0,13),(1,12),(2,11),(6,10)]]]
        self.foldalign_exp = [['GCAGAUUUAGAUGC',[(0,13),(1,12),(2,11),(6,10)]]]
        self.find_struct_exp = [[[(0,13),(1,12),(2,11),(6,10)],'GCCACGUAGCUCAG',
                                'GCCGUAUGUUUCAG']]
        
    def test_pfold_output(self):
        """Test for column_parser for pfold format"""
        
        tree,lines = tree_struct_sep(self.pfold_out)
        self.assertEqual(tree,PFOLD_tree)
        obs = column_parser(lines)
        self.assertEqual(obs,self.pfold_exp)

    def test_foldalign_output(self):
        """Test for column_parser for foldalign format"""
        
        obs = column_parser(self.foldalign_out)
        self.assertEqual(obs,self.foldalign_exp)

    def test_foldalign_find_struct(self):
        """ Test for foldalign parser find struct function"""

        obs = find_struct(self.foldalign_out)
        self.assertEqual(obs,self.find_struct_exp)


FOLDALIGN = ['; FOLDALIGN           2.0.3\n', 
'; REFERENCE           J.H. Havgaard, R.B. Lyngs\xf8, G.D. Stormo, J. Gorodkin\n', 
'; REFERENCE           Pairwise local structural alignment of RNA sequences\n',
 '; REFERENCE           with sequence similarity less than 40%\n', 
'; REFERENCE           Bioinformatics 21(9), 1815-1824, 2005\n', 
'; ALIGNMENT_ID        n.a.\n', '; ALIGNING            seq1 against seq2\n', 
'; ALIGN               seq1          \n', 
'; ALIGN               seq2          \n', 
'; ALIGN               Score: 929\n', 
'; ALIGN               Identity: 69 % ( 48 / 70 )\n', 
'; ALIGN               Begin\n', '; ALIGN\n', 
'; ALIGN               seq1          GCCACGUAGC UCAG\n', 
'; ALIGN               Structure     (((...(... ))))\n', 
'; ALIGN               seq2          GCCGUAUGUU UCAG\n', 
'; ALIGN \n', '; ALIGN               End\n', 
'; ==============================================================================\n', 
'; TYPE                RNA\n', '; COL 1               label\n', 
'; COL 2               residue\n', '; COL 3               seqpos\n', 
'; COL 4               alignpos\n', '; COL 5               align_bp\n', 
'; COL 6               seqpos_bp\n', '; ENTRY               seq1\n', 
'; ALIGNMENT_ID        n.a.\n', '; ALIGNMENT_LIST      seq1 seq2\n', 
'; FOLDALIGN_SCORE     929\n', '; GROUP               1\n', 
'; FILENAME            seq1.fasta\n', '; START_POSITION      2\n', 
'; END_POSITION        71\n', '; ALIGNMENT_SIZE      2\n', 
'; ALIGNMENT_LENGTH    70\n', '; SEQUENCE_LENGTH     76\n', 
'; PARAMETER           max_length=76\n', 
'; PARAMETER           max_diff=76\n', 
'; PARAMETER           min_loop=3\n', 
'; PARAMETER           score_matrix=<default>\n', 
'; PARAMETER           nobranching=<false>\n', 
'; PARAMETER           global=<false>\n', 
'; ----------\n', 
'N     G     1    1      14      0.90\n', 
'N     C     2    2      13      0.79\n', 
'N     A     3    3      12      0.87\n', 
'N     G     4    4       .      0.60\n', 
'N     A     5    5       .      0.34\n', 
'N     U     6    6       .      0.34\n', 
'N     U     7    7      11      0.98\n', 
'N     U     8    8       .      0.34\n', 
'N     A     9    9       .      0.56\n', 
'N     G    10    10      .      0.67\n', 
'N     A    11    11      7      0.78\n', 
'N     U    12    12      3      0.87\n', 
'N     G    13    13      2      0.87\n', 
'N     C    14    14      1      0.90\n', 
'; **********\n']
PFOLD = ['; generated by fasta2col\n', 
'; ============================================================\n', 
'; TYPE              TREE\n', '; COL 1             label\n', 
'; COL 2             number\n', '; COL 3             name\n', 
'; COL 4             uplen\n', '; COL 5             child\n', 
'; COL 6             brother\n', '; ENTRY             tree\n', 
'; root              1\n', '; ----------\n', 
' N     1         seq1 0.001000     .     .\n', 
'; **********\n', '; TYPE              RNA\n', '; COL 1             label\n', 
'; COL 2             residue\n', '; COL 3             seqpos\n', 
'; COL 4             alignpos\n', '; COL 5             align_bp\n', 
'; COL 6             certainty\n', '; ENTRY             seq1\n', 
'; ----------\n', 
'N     G     1    1      14      0.90\n', 
'N     C     2    2      13      0.79\n', 
'N     A     3    3      12      0.87\n', 
'N     G     4    4       .      0.60\n', 
'N     A     5    5       .      0.34\n', 
'N     U     6    6       .      0.34\n', 
'N     U     7    7      11      0.98\n', 
'N     U     8    8       .      0.34\n', 
'N     A     9    9       .      0.56\n', 
'N     G    10    10      .      0.67\n', 
'N     A    11    11      7      0.78\n', 
'N     U    12    12      3      0.87\n', 
'N     G    13    13      2      0.87\n', 
'N     C    14    14      1      0.90\n', 
'; **********\n'] 


PFOLD_tree = ['; generated by fasta2col\n', 
'; ============================================================\n', 
'; TYPE              TREE\n', '; COL 1             label\n', 
'; COL 2             number\n', '; COL 3             name\n', 
'; COL 4             uplen\n', '; COL 5             child\n', 
'; COL 6             brother\n', '; ENTRY             tree\n', 
'; root              1\n', '; ----------\n', 
' N     1         seq1 0.001000     .     .\n', '; **********\n']

if __name__ == '__main__':
    main()