File: test_clustal.py

package info (click to toggle)
python-cogent 1.4.1-1.2
links: PTS, VCS
area: non-free
in suites: squeeze
size: 13,260 kB
ctags: 20,087
sloc: python: 116,163; ansic: 732; makefile: 74; sh: 9
file content (131 lines) | stat: -rw-r--r-- 5,369 bytes
#!/usr/bin/env python
"""Unit tests for the clustal parsers.
"""
from cogent.parse.clustal import LabelLineParser, is_clustal_seq_line, \
    last_space, delete_trailing_number, MinimalClustalParser
from cogent.parse.record import RecordError
from cogent.util.unit_test import TestCase, main
from cogent.core.alignment import Alignment

__author__ = "Rob Knight"
__copyright__ = "Copyright 2007-2009, The Cogent Project"
__credits__ = ["Rob Knight", "Sandra Smit"]
__license__ = "GPL"
__version__ = "1.4.1"
__maintainer__ = "Rob Knight"
__email__ = "rob@spot.colorado.edu"
__status__ = "Production"

#Note: the data are all strings and hence immutable, so it's OK to define
#them here instead of in setUp and then subclassing everything from that
#base class. If the data were mutable, we'd need to take more precautions
#to avoid crossover between tests.

minimal = 'abc\tucag'
two = 'abc\tuuu\ndef\tccc\n\n    ***\n\ndef ggg\nabc\taaa\n'.split('\n')

real = """CLUSTAL W (1.82) multiple sequence alignment


abc             GCAUGCAUGCAUGAUCGUACGUCAGCAUGCUAGACUGCAUACGUACGUACGCAUGCAUCA 60
def             ------------------------------------------------------------
xyz             ------------------------------------------------------------


abc             GUCGAUACGUACGUCAGUCAGUACGUCAGCAUGCAUACGUACGUCGUACGUACGU-CGAC 119
def             -----------------------------------------CGCGAUGCAUGCAU-CGAU 18
xyz             -------------------------------------CAUGCAUCGUACGUACGCAUGAC 23
                                                         *    * * * *    **

abc             UGACUAGUCAGCUAGCAUCGAUCAGU 145
def             CGAUCAGUCAGUCGAU---------- 34
xyz             UGCUGCAUCA---------------- 33
                *     ***""".split('\n')

bad = ['dshfjsdfhdfsj','hfsdjksdfhjsdf']

space_labels = ['abc uca','def ggg ccc']

class clustalTests(TestCase):
    """Tests of top-level functions."""
    def test_is_clustal_seq_line(self):
        """is_clustal_seq_line should reject blanks and 'CLUSTAL'"""
        ic = is_clustal_seq_line
        assert ic('abc')
        assert ic('abc  def')
        assert not ic('CLUSTAL')
        assert not ic('CLUSTAL W fsdhicjkjsdk')
        assert not ic('  *   *')
        assert not ic(' abc def')
        assert not ic('MUSCLE (3.41) multiple sequence alignment')

    def test_last_space(self):
        """last_space should split on last whitespace"""
        self.assertEqual(last_space('a\t\t\t  b    c'), ['a b', 'c'])
        self.assertEqual(last_space('xyz'), ['xyz'])
        self.assertEqual(last_space('  a b'), ['a','b'])

    def test_delete_trailing_number(self):
        """delete_trailing_number should delete the trailing number if present"""
        dtn = delete_trailing_number
        self.assertEqual(dtn('abc'), 'abc')
        self.assertEqual(dtn('a b c'), 'a b c')
        self.assertEqual(dtn('a \t  b  \t  c'), 'a \t  b  \t  c')
        self.assertEqual(dtn('a b 3'), 'a b')
        self.assertEqual(dtn('a b c \t 345'), 'a b c')

class MinimalClustalParserTests(TestCase):
    """Tests of the MinimalClustalParser class"""
    def test_null(self):
        """MinimalClustalParser should return empty dict and list on null input"""
        result = MinimalClustalParser([])
        self.assertEqual(result, ({},[]))
        
    def test_minimal(self):
        """MinimalClustalParser should handle single-line input correctly"""
        result = MinimalClustalParser([minimal]) #expects seq of lines
        self.assertEqual(result, ({'abc':['ucag']}, ['abc']))

    def test_two(self):
        """MinimalClustalParser should handle two-sequence input correctly"""
        result = MinimalClustalParser(two)
        self.assertEqual(result, ({'abc':['uuu','aaa'],'def':['ccc','ggg']}, \
            ['abc', 'def']))

    def test_real(self):
        """MinimalClustalParser should handle real Clustal output"""
        data, labels = MinimalClustalParser(real)
        self.assertEqual(labels, ['abc', 'def', 'xyz'])
        self.assertEqual(data, {
            'abc':
            [   'GCAUGCAUGCAUGAUCGUACGUCAGCAUGCUAGACUGCAUACGUACGUACGCAUGCAUCA', 
                'GUCGAUACGUACGUCAGUCAGUACGUCAGCAUGCAUACGUACGUCGUACGUACGU-CGAC',
                'UGACUAGUCAGCUAGCAUCGAUCAGU'
            ],
            'def':
            [   '------------------------------------------------------------',
                '-----------------------------------------CGCGAUGCAUGCAU-CGAU',
                'CGAUCAGUCAGUCGAU----------'
            ],
            'xyz':
            [   '------------------------------------------------------------',
                '-------------------------------------CAUGCAUCGUACGUACGCAUGAC',
                'UGCUGCAUCA----------------'
            ]
            })

    def test_bad(self):
        """MinimalClustalParser should reject bad data if strict"""
        result = MinimalClustalParser(bad, strict=False)
        self.assertEqual(result, ({},[]))
        #should fail unless we turned strict processing off
        self.assertRaises(RecordError, MinimalClustalParser, bad)

    def test_space_labels(self):
        """MinimalClustalParser should tolerate spaces in labels"""
        result = MinimalClustalParser(space_labels)
        self.assertEqual(result, ({'abc':['uca'],'def ggg':['ccc']},\
            ['abc', 'def ggg']))

if __name__ == '__main__':
    main()