File: test_clustal.py

package info (click to toggle)
python-cogent 1.4.1-1.2
  • links: PTS, VCS
  • area: non-free
  • in suites: squeeze
  • size: 13,260 kB
  • ctags: 20,087
  • sloc: python: 116,163; ansic: 732; makefile: 74; sh: 9
file content (131 lines) | stat: -rw-r--r-- 5,369 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env python
"""Unit tests for the clustal parsers.
"""
from cogent.parse.clustal import LabelLineParser, is_clustal_seq_line, \
    last_space, delete_trailing_number, MinimalClustalParser
from cogent.parse.record import RecordError
from cogent.util.unit_test import TestCase, main
from cogent.core.alignment import Alignment

__author__ = "Rob Knight"
__copyright__ = "Copyright 2007-2009, The Cogent Project"
__credits__ = ["Rob Knight", "Sandra Smit"]
__license__ = "GPL"
__version__ = "1.4.1"
__maintainer__ = "Rob Knight"
__email__ = "rob@spot.colorado.edu"
__status__ = "Production"

#Note: the data are all strings and hence immutable, so it's OK to define
#them here instead of in setUp and then subclassing everything from that
#base class. If the data were mutable, we'd need to take more precautions
#to avoid crossover between tests.

minimal = 'abc\tucag'
two = 'abc\tuuu\ndef\tccc\n\n    ***\n\ndef ggg\nabc\taaa\n'.split('\n')

real = """CLUSTAL W (1.82) multiple sequence alignment


abc             GCAUGCAUGCAUGAUCGUACGUCAGCAUGCUAGACUGCAUACGUACGUACGCAUGCAUCA 60
def             ------------------------------------------------------------
xyz             ------------------------------------------------------------


abc             GUCGAUACGUACGUCAGUCAGUACGUCAGCAUGCAUACGUACGUCGUACGUACGU-CGAC 119
def             -----------------------------------------CGCGAUGCAUGCAU-CGAU 18
xyz             -------------------------------------CAUGCAUCGUACGUACGCAUGAC 23
                                                         *    * * * *    **

abc             UGACUAGUCAGCUAGCAUCGAUCAGU 145
def             CGAUCAGUCAGUCGAU---------- 34
xyz             UGCUGCAUCA---------------- 33
                *     ***""".split('\n')

bad = ['dshfjsdfhdfsj','hfsdjksdfhjsdf']

space_labels = ['abc uca','def ggg ccc']

class clustalTests(TestCase):
    """Tests of top-level functions."""
    def test_is_clustal_seq_line(self):
        """is_clustal_seq_line should reject blanks and 'CLUSTAL'"""
        ic = is_clustal_seq_line
        assert ic('abc')
        assert ic('abc  def')
        assert not ic('CLUSTAL')
        assert not ic('CLUSTAL W fsdhicjkjsdk')
        assert not ic('  *   *')
        assert not ic(' abc def')
        assert not ic('MUSCLE (3.41) multiple sequence alignment')

    def test_last_space(self):
        """last_space should split on last whitespace"""
        self.assertEqual(last_space('a\t\t\t  b    c'), ['a b', 'c'])
        self.assertEqual(last_space('xyz'), ['xyz'])
        self.assertEqual(last_space('  a b'), ['a','b'])

    def test_delete_trailing_number(self):
        """delete_trailing_number should delete the trailing number if present"""
        dtn = delete_trailing_number
        self.assertEqual(dtn('abc'), 'abc')
        self.assertEqual(dtn('a b c'), 'a b c')
        self.assertEqual(dtn('a \t  b  \t  c'), 'a \t  b  \t  c')
        self.assertEqual(dtn('a b 3'), 'a b')
        self.assertEqual(dtn('a b c \t 345'), 'a b c')

class MinimalClustalParserTests(TestCase):
    """Tests of the MinimalClustalParser class"""
    def test_null(self):
        """MinimalClustalParser should return empty dict and list on null input"""
        result = MinimalClustalParser([])
        self.assertEqual(result, ({},[]))
        
    def test_minimal(self):
        """MinimalClustalParser should handle single-line input correctly"""
        result = MinimalClustalParser([minimal]) #expects seq of lines
        self.assertEqual(result, ({'abc':['ucag']}, ['abc']))

    def test_two(self):
        """MinimalClustalParser should handle two-sequence input correctly"""
        result = MinimalClustalParser(two)
        self.assertEqual(result, ({'abc':['uuu','aaa'],'def':['ccc','ggg']}, \
            ['abc', 'def']))

    def test_real(self):
        """MinimalClustalParser should handle real Clustal output"""
        data, labels = MinimalClustalParser(real)
        self.assertEqual(labels, ['abc', 'def', 'xyz'])
        self.assertEqual(data, {
            'abc':
            [   'GCAUGCAUGCAUGAUCGUACGUCAGCAUGCUAGACUGCAUACGUACGUACGCAUGCAUCA', 
                'GUCGAUACGUACGUCAGUCAGUACGUCAGCAUGCAUACGUACGUCGUACGUACGU-CGAC',
                'UGACUAGUCAGCUAGCAUCGAUCAGU'
            ],
            'def':
            [   '------------------------------------------------------------',
                '-----------------------------------------CGCGAUGCAUGCAU-CGAU',
                'CGAUCAGUCAGUCGAU----------'
            ],
            'xyz':
            [   '------------------------------------------------------------',
                '-------------------------------------CAUGCAUCGUACGUACGCAUGAC',
                'UGCUGCAUCA----------------'
            ]
            })

    def test_bad(self):
        """MinimalClustalParser should reject bad data if strict"""
        result = MinimalClustalParser(bad, strict=False)
        self.assertEqual(result, ({},[]))
        #should fail unless we turned strict processing off
        self.assertRaises(RecordError, MinimalClustalParser, bad)

    def test_space_labels(self):
        """MinimalClustalParser should tolerate spaces in labels"""
        result = MinimalClustalParser(space_labels)
        self.assertEqual(result, ({'abc':['uca'],'def ggg':['ccc']},\
            ['abc', 'def ggg']))

if __name__ == '__main__':
    main()