1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
|
#!/usr/bin/env python
from __future__ import division
from numpy import array, transpose, alltrue
from cogent.util.unit_test import TestCase, main
from cogent.core.moltype import RNA
from cogent.core.sequence import RnaSequence, Sequence, ModelSequence
from cogent.core.alignment import Alignment, DenseAlignment
__author__ = "Sandra Smit"
__copyright__ = "Copyright 2007-2009, The Cogent Project"
__credits__ = ["Sandra Smit", "Gavin Huttley"]
__license__ = "GPL"
__version__ = "1.4.1"
__maintainer__ = "Sandra Smit"
__email__ = "sandra.smit@colorado.edu"
__status__ = "Production"
class AllTests(TestCase):
def setUp(self):
"""setUp method for all tests"""
# named sequences
self.rna1 = RnaSequence('UCAGGG', Name='rna1')
self.rna2 = RnaSequence('YCU-RG', Name='rna2')
self.rna3 = RnaSequence('CAA-NR', Name='rna3')
self.model1 = ModelSequence('UCAGGG', Name='rna1',\
Alphabet=RNA.Alphabets.DegenGapped)
self.model2 = ModelSequence('YCU-RG', Name='rna2',\
Alphabet=RNA.Alphabets.DegenGapped)
self.model3 = ModelSequence('CAA-NR', Name='rna3',\
Alphabet=RNA.Alphabets.DegenGapped)
self.aln = Alignment([self.rna1, self.rna2, self.rna3], MolType=RNA)
self.da = DenseAlignment([self.model1, self.model2, self.model3],\
MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped)
# seqs no name
self.nn_rna1 = RnaSequence('UCAGGG')
self.nn_rna2 = RnaSequence('YCU-RG')
self.nn_rna3 = RnaSequence('CAA-NR')
self.nn_model1 = ModelSequence('UCAGGG',\
Alphabet=RNA.Alphabets.DegenGapped)
self.nn_model2 = ModelSequence('YCU-RG',\
Alphabet=RNA.Alphabets.DegenGapped)
self.nn_model3 = ModelSequence('CAA-NR',\
Alphabet=RNA.Alphabets.DegenGapped)
self.nn_aln = Alignment([self.nn_rna1, self.nn_rna2, self.nn_rna3],\
MolType=RNA)
self.nn_da = DenseAlignment([self.nn_model1, self.nn_model2,\
self.nn_model3], MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped)
def test_printing_named_seqs(self):
"""Printing named seqs should work the same on Aln and DenseAln"""
#Note: the newline trailing each sequence is intentional, because
#we want each FASTA-format record to be separated.
exp_lines_general = ['>rna1','UCAGGG','>rna2','YCU-RG','>rna3','CAA-NR']
self.assertEqual(str(self.aln), '\n'.join(exp_lines_general) + '\n')
self.assertEqual(str(self.da), '\n'.join(exp_lines_general) + '\n')
def test_printing_unnamed_seqs(self):
"""Printing unnamed sequences should work the same on Aln and DenseAln
"""
exp_lines_gen = ['>seq_0','UCAGGG','>seq_1','YCU-RG','>seq_2','CAA-NR\n']
self.assertEqual(str(self.nn_aln),'\n'.join(exp_lines_gen))
self.assertEqual(str(self.nn_da),'\n'.join(exp_lines_gen))
def test_DenseAlignment_without_moltype(self):
"""Expect MolType to be picked up from the sequences."""
m1 = ModelSequence('UCAG',Alphabet=RNA.Alphabets.DegenGapped,\
Name='rna1')
m2 = ModelSequence('CCCR',Alphabet=RNA.Alphabets.DegenGapped,\
Name='rna2')
da = DenseAlignment([m1, m2])
exp_lines = ['>rna1','UCAG','>rna2','CCCR']
self.assertEqual(str(da), '\n'.join(exp_lines) + '\n')
def test_names(self):
# Should both alignments handle names the same way?
self.assertEqual(self.aln.Names, ['rna1','rna2','rna3'])
self.assertEqual(self.da.Names, ['rna1','rna2','rna3'])
# On unnamed sequences the behavior is now the same.
self.assertEqual(self.nn_aln.Names, ['seq_0','seq_1','seq_2'])
self.assertEqual(self.nn_da.Names, ['seq_0','seq_1','seq_2'])
def test_seqFreqs(self):
"""seqFreqs should work the same on Alignment and DenseAlignment"""
# Used alphabet: ('U', 'C', 'A', 'G', '-', 'B', 'D', 'H',\
# 'K', 'M', 'N', 'S', 'R', 'W', 'V', 'Y')
exp = [[1,1,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0],\
[1,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0],\
[0,1,2,0,1,0,0,0,0,0,1,0,1,0,0,0,0]]
# This works
self.assertEqual(self.da.getSeqFreqs().Data, exp)
# This used to raise an error, but now works
self.assertEqual(self.aln.getSeqFreqs().Data, exp)
def test_subset_positions_DenseAlignment(self):
model1 = ModelSequence('UCG', Name='rna1',\
Alphabet=RNA.Alphabets.DegenGapped)
model2 = ModelSequence('YCG', Name='rna2',\
Alphabet=RNA.Alphabets.DegenGapped)
model3 = ModelSequence('CAR', Name='rna3',\
Alphabet=RNA.Alphabets.DegenGapped)
sub_da = DenseAlignment([model1, model2, model3],\
MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped)
full_data = array([[0,1,2,3,3,3],[15,1,0,4,12,3],[1,2,2,4,10,12]])
sub_data = array([[0,1,3],[15,1,3],[1,2,12]])
# First check some data
self.assertEqual(self.da.ArraySeqs, full_data)
self.assertEqual(self.da.ArrayPositions, transpose(full_data))
self.assertEqual(sub_da.ArraySeqs, sub_data)
self.assertEqual(sub_da.ArrayPositions, transpose(sub_data))
obs_sub_da_TP = self.da.takePositions([0,1,5])
obs_sub_da_SA = self.da.getSubAlignment(pos=[0,1,5])
# When using the getSubAlignment method the data is right
self.assertEqual(obs_sub_da_SA, sub_da)
self.failIfEqual(obs_sub_da_SA, self.da)
self.assertEqual(obs_sub_da_SA.ArraySeqs, sub_data)
self.assertEqual(obs_sub_da_SA.ArrayPositions, transpose(sub_data))
# For the takePositions method: Why does this work
self.assertEqual(obs_sub_da_TP, sub_da)
self.failIfEqual(obs_sub_da_TP, self.da)
# If the data doesn't match?
self.assertEqual(obs_sub_da_TP.ArraySeqs, sub_data)
self.assertEqual(obs_sub_da_TP.ArrayPositions, transpose(sub_data))
# Shouldn't the __eq__ method check the data at least?
def test_subset_positions_Alignment(self):
rna1 = RnaSequence('UCG', Name='rna1')
rna2 = RnaSequence('YCG', Name='rna2')
rna3 = RnaSequence('CAR', Name='rna3')
sub_aln = Alignment([rna1, rna2, rna3], MolType=RNA)
obs_sub_aln = self.aln.takePositions([0,1,5])
self.assertEqual(obs_sub_aln, sub_aln)
self.failIfEqual(obs_sub_aln, self.aln)
# string representations should be the same. This fails right
# now, because sequence order is not maintained. See separate test.
self.assertEqual(str(obs_sub_aln), str(sub_aln))
def test_takePositions_sequence_order(self):
"""Alignment takePositions should maintain seq order"""
#This works
self.assertEqual(self.da.Names,['rna1','rna2','rna3'])
sub_da = self.da.getSubAlignment(pos=[0,1,5])
self.assertEqual(sub_da.Names,['rna1','rna2','rna3'])
# seq order not maintained in Alignment
self.assertEqual(self.aln.Names,['rna1','rna2','rna3'])
sub_aln = self.aln.takePositions([0,1,5])
self.assertEqual(sub_aln.Names,['rna1','rna2','rna3'])
def test_subset_seqs_Alignment(self):
rna1 = RnaSequence('UCG', Name='rna1')
rna2 = RnaSequence('YCG', Name='rna2')
rna3 = RnaSequence('CAR', Name='rna3')
sub_aln = Alignment([rna2, rna3], MolType=RNA)
aln = Alignment([rna1, rna2, rna3], MolType=RNA)
obs_sub_aln = aln.takeSeqs(['rna2','rna3'])
self.assertEqual(obs_sub_aln, sub_aln)
self.assertEqual(str(obs_sub_aln), str(sub_aln))
# Selected sequences should be in specified order?
obs_sub_aln_1 = self.aln.takeSeqs(['rna3','rna2'])
obs_sub_aln_2 = self.aln.takeSeqs(['rna2','rna3'])
self.failIfEqual(str(obs_sub_aln_1), str(obs_sub_aln_2))
def test_subset_seqs_DenseAlignment(self):
model1 = ModelSequence('UCG', Name='rna1',\
Alphabet=RNA.Alphabets.DegenGapped)
model2 = ModelSequence('YCG', Name='rna2',\
Alphabet=RNA.Alphabets.DegenGapped)
model3 = ModelSequence('CAR', Name='rna3',\
Alphabet=RNA.Alphabets.DegenGapped)
sub_da = DenseAlignment([model1, model2, model3],\
MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped)
# takeSeqs by name should have the same effect as
# getSubAlignment by seq idx?
obs_sub_da_TS = self.da.takeSeqs(['rna1'])
obs_sub_da_SA = self.da.getSubAlignment(seqs=[0])
# These two are now the same. Fixed mapping of key to char array.
self.assertEqual(obs_sub_da_TS, obs_sub_da_SA)
self.assertEqual(str(obs_sub_da_TS), str(obs_sub_da_SA))
def test_aln_equality(self):
# When does something compare equal?
self.assertEqual(self.da == self.da, True)
# one sequence less
other_da1 = DenseAlignment([self.model1, self.model2],\
MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped)
self.assertEqual(self.da == other_da1, False)
# seqs in different order -- doesn't matter
other_da2 = DenseAlignment([self.model1, self.model3, self.model2],\
MolType=RNA, Alphabet=RNA.Alphabets.DegenGapped)
self.assertEqual(self.da == other_da2, True)
# seqs in different encoding -- doesn't matter, only looks at data
other_da3 = DenseAlignment([self.model1, self.model2, self.model3])
# Should this compare False even though the data is exactly the same?
# The MolType is different...
self.assertEqual(self.da == other_da3, True)
assert alltrue(map(alltrue,self.da.ArraySeqs == other_da3.ArraySeqs))
def test_seq_equality(self):
model1 = ModelSequence('UCG', Name='rna1',\
Alphabet=RNA.Alphabets.DegenGapped)
model2 = ModelSequence('UCG', Name='rna1',\
Alphabet=RNA.Alphabets.DegenGapped)
# Shouldn't the above two sequences be equal?
self.assertEqual(model1, model2)
# string comparison is True
self.assertEqual(str(model1), str(model2))
def test_seq_ungapping(self):
rna1 = RnaSequence('U-C-A-G-', Name='rna1')
model1 = ModelSequence('U-C-A-G-', Name='rna1',\
Alphabet=RNA.Alphabets.DegenGapped)
self.assertEqual(rna1, 'U-C-A-G-')
self.assertEqual(rna1.degap(), 'UCAG')
# check is produces the right string from the beginning
self.assertEqual(str(model1), 'U-C-A-G-')
self.assertEqual(model1._data, [0,4,1,4,2,4,3,4])
# ModelSequence should maybe have the same degap method as normal Seq
self.assertEqual(str(model1.degap()), 'UCAG')
def test_the_rest_of_ModelSequence(self):
"""The class ModelSequence has 14 methods, but only 2 unittests.
You might want to add some tests there..."""
#note: mostly these are tested in derived classes, for convenience.
pass
if __name__ == "__main__":
main()
|