1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
|
#!/usr/bin/env python
"""Unit tests for GFF and related parsers.
"""
from cogent.parse.gff import *
from cogent.util.unit_test import TestCase, main
from StringIO import StringIO
__author__ = "Matthew Wakefield"
__copyright__ = "Copyright 2007-2012, The Cogent Project"
__credits__ = ["Matthew Wakefield"]
__license__ = "GPL"
__version__ = "1.5.3"
__maintainer__ = "Matthew Wakefield"
__email__ = "wakefield@wehi.edu.au"
__status__ = "Production"
headers = [
"""##gff-version 2
##source-version <source> <version text>
##date <date>
##Type <type> [<seqname>]
##DNA <seqname>
##acggctcggattggcgctggatgatagatcagacgac
##...
##end-DNA
""",
"""##gff-version 2
""",
"",
]
# '<seqname>\t<source>\t<feature>\t<start>\t<end>\t<score>\t<strand>\t<frame>\t[attribute]\n'
data_lines = [
('seq1\tBLASTX\tsimilarity\t101\t235\t87.1\t+\t0\tTarget "HBA_HUMAN" 11 55 ; E_value 0.0003\n',
('seq1', 'BLASTX', 'similarity', 100, 235, '87.1', '+', '0', 'Target "HBA_HUMAN" 11 55 ; E_value 0.0003', None)),
('dJ102G20\tGD_mRNA\tcoding_exon\t7105\t7201\t.\t-\t2\tSequence "dJ102G20.C1.1"\n',
('dJ102G20', 'GD_mRNA', 'coding_exon', 7201, 7104, '.', '-', '2', 'Sequence "dJ102G20.C1.1"', None)),
('dJ102G20\tGD_mRNA\tcoding_exon\t7105\t7201\t.\t-\t2\t\n',
('dJ102G20', 'GD_mRNA', 'coding_exon', 7201, 7104, '.', '-', '2', '', None)),
('12345\tSource with spaces\tfeature with spaces\t-100\t3600000000\t1e-5\t-\t.\tSequence "BROADO5" ; Note "This is a \\t tab containing \\n multi line comment"\n',
('12345', 'Source with spaces', 'feature with spaces', 3600000000L, 101, '1e-5', '-', '.', 'Sequence "BROADO5" ; Note "This is a \\t tab containing \\n multi line comment"', None)),
]
class GffTest(TestCase):
"""Setup data for all the GFF parsers."""
def testGffParserData(self):
"""Test GffParser with valid data lines"""
for (line,canned_result) in data_lines:
result = GffParser(StringIO(line)).next()
self.assertEqual(result,canned_result)
def testGffParserHeaders(self):
"""Test GffParser with valid data headers"""
data = "".join([x[0] for x in data_lines])
for header in headers:
result = list(GffParser(StringIO(header+data)))
self.assertEqual(result,[x[1] for x in data_lines])
def test_parse_attributes(self):
"""Test parse_attributes"""
self.assertEqual([parse_attributes(x[1][8]) for x in data_lines],
['HBA_HUMAN', 'dJ102G20.C1.1', '', 'BROADO5'])
if __name__ == '__main__':
main()
|