1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
|
#! /usr/bin/env python
##############################################################################
## DendroPy Phylogenetic Computing Library.
##
## Copyright 2010-2015 Jeet Sukumaran and Mark T. Holder.
## All rights reserved.
##
## See "LICENSE.rst" for terms and conditions of usage.
##
## If you use this work or any portion thereof in published work,
## please cite it as:
##
## Sukumaran, J. and M. T. Holder. 2010. DendroPy: a Python library
## for phylogenetic computing. Bioinformatics 26: 1569-1571.
##
##############################################################################
"""
Tests for tokenizers classes.
"""
import unittest
from dendropy.dataio import nexusprocessing
from dendropy.utility.textprocessing import StringIO
class NexusTokenizerTestCase(unittest.TestCase):
"""
Unit tests for NexusTokenizer.
"""
def check_tokenization(self,
input_str,
expected_tokens):
src = StringIO(input_str)
observed = []
for token in nexusprocessing.NexusTokenizer(src=src):
observed.append(token)
self.assertEqual(observed, expected_tokens)
def test_simple_string(self):
input_str = "the quick brown\t\tfox \n jumps over\t\t\n the lazy dog"
expected = [
"the", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog"
]
self.check_tokenization(input_str, expected)
def test_simple_quoted_string(self):
input_str = "the quick 'brown fox' jumps over the 'lazy dog'"
expected = [
"the", "quick", "brown fox", "jumps", "over", "the", "lazy dog"
]
self.check_tokenization(input_str, expected)
def test_padded_quoted_string(self):
input_str = "the quick 'brown fox''s friend' jumps over the 'lazy dog''s colleague'"
expected = [
"the", "quick", "brown fox's friend", "jumps", "over", "the", "lazy dog's colleague"
]
self.check_tokenization(input_str, expected)
def test_runon_quoted_string(self):
input_str = "'a','b','c','d','e'"
expected = [
"a", ",", "b", ",", "c", ",", "d", ",", "e",
]
self.check_tokenization(input_str, expected)
def test_comments(self):
input_str = "[&R] (foo:1 [a foo object], [start of subgroup](bar:2, c:2)[end of group][][][";
expected = [
"(", "foo", ":","1", ",", "(", "bar", ":", "2", ",", "c", ":", "2", ")"
]
self.check_tokenization(input_str, expected)
def test_empty(self):
input_str = "";
expected = []
self.check_tokenization(input_str, expected)
def test_captured_delimiters(self):
input_str = "(aaa:1.00, (b:2.18e-1, (ccc:11, d:1e-1) k: 3) u: 7) rrr:0.0;";
expected = [
"(",
"aaa",
":",
"1.00",
",",
"(",
"b",
":",
"2.18e-1",
",",
"(",
"ccc",
":",
"11",
",",
"d",
":",
"1e-1",
")",
"k",
":",
"3",
")",
"u",
":",
"7",
")",
"rrr",
":",
"0.0",
";"
]
self.check_tokenization(input_str, expected)
def test_comments(self):
input_str = "([the quick]apple[brown],([fox]banjo,([jumps]cucumber[over the],[really]dogwood)[lazy]eggplant)) rhubarb[dog];";
expected_comments = {
"apple" : ["the quick", "brown"],
"banjo" : ["fox" ],
"cucumber" : ["jumps", "over the" ],
"dogwood" : ["really" ],
"eggplant" : ["lazy" ],
"rhubarb" : ["dog" ],
}
expected_tokens = [
"(",
"apple",
",",
"(",
"banjo",
",",
"(",
"cucumber",
",",
"dogwood",
")",
"eggplant",
")",
")",
"rhubarb",
";"
]
src = StringIO(input_str)
observed_tokens = []
tk = nexusprocessing.NexusTokenizer(src=src)
for token in tk:
if token in expected_comments:
expected_comment = expected_comments[token]
observed_comment = tk.pull_captured_comments()
self.assertEqual(expected_comment, observed_comment)
del expected_comments[token]
observed_tokens.append(token)
self.assertEqual(expected_comments, {})
self.assertEqual(observed_tokens, expected_tokens)
if __name__ == "__main__":
unittest.main()
|