1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
|
import warnings
from unittest import TestCase
import numpy
from numpy import array, dot, empty
from numpy.testing import assert_allclose
from cogent3 import DNA, make_aligned_seqs, make_tree
from cogent3.evolve.ns_substitution_model import (
DiscreteSubstitutionModel,
General,
GeneralStationary,
NonReversibleCodon,
NonReversibleDinucleotide,
NonReversibleNucleotide,
NonReversibleProtein,
NonReversibleTrinucleotide,
StrandSymmetric,
)
from cogent3.evolve.predicate import MotifChange
from cogent3.evolve.substitution_model import TimeReversibleNucleotide
def _make_likelihood(model, tree, results, is_discrete=False):
"""creates the likelihood function"""
# discrete model fails to make a likelihood function if tree has
# lengths
if is_discrete:
kwargs = {}
else:
kwargs = dict(expm="pade")
lf = model.make_likelihood_function(tree, optimise_motif_probs=True, **kwargs)
if not is_discrete:
for param in lf.get_param_names():
if param in ("length", "mprobs"):
continue
lf.set_param_rule(param, is_independent=True, upper=5)
lf.set_alignment(results["aln"])
return lf
_aln = {
"Human": "ATGCGGCTCGCGGAGGAGCGGGCCGCGCTC------GCGGCGGAGAACGCGGATGGGGAACCCGGC---GCCGACCGACGACTGCGACTCCTGGGGACCTACGTGGCCATGAGCCTGCGGCCGGCTGCGGGCGCCTGGGAGCGTTGCGCGGGGAGTGCTGAGGCGGAGCAGCTGCTCCAGGCCTTCCTG---GGCCGCGATGCTGCCGAGGGGCCGCGGCCG------CTGCTGGTGGTGCGGCCCGGGCCCAGGGGCCTGGCAATACGCCCCGGGCTGGAGGTGGGACCTGAGTCGGGCCTGGCTGGCGCTAAGGCGCTTTTTTTCCTTCGCACCGGG---CCCGAGCCTCCAGGGCCCGACAGCTTCCGCGGCGCAGTGGTCTGCGGGGACCTGCCCGCGGCACCTCTGGAGCACCTAGCCGCGCTGTTCTCGGAGGTTGTTCTACCCGTCCTGGCCAATGAGAAGAATCGCCTAAACTGGCCCCACATGATATGTGAGGATGTCAGGCGGCACGCCCACAGCCTCCAATGTGACCTCTCAGTTATACTTGAGCAAGTGAAGGGAAAAACTTTGCTGCCTCTTCCAGCAGGCTCAGAAAAAATGGAGTTTGCGGATTCCAAAAGTGAGACAGTCTTGGATTCTATAGATAAGTCAGTCATCTATGCCATTGAGTCTGCAGTGATCAAATGGAGCTACCAAGTCCAGGTGGTACTCAAGAGAGAGTCTTCCCAGCCACTCTTACAAGGGGAGAATCCCACCCCTAAGGTGGAGTTGGAGTTCTGGAAGAGCAGGTATGAAGATCTGAAATACATCTATAATCAACTGAGAACAATAACGGTGAGGGGCATGGCCAAGCTCCTGGACAAGCTTCAGAGTAGCTACTTTCCAGCTTTCAAAGCCATGTACAGAGATGTTGTTGCAGCTCTAGCAGAGGCACAGGACATCCATGTGCACCTGATACCGCTCCAGCGCCACCTGGAAGCTCTGGAGAATGCAGAATTTCCGGAGGTGAAGCCCCAGCTGCGGCCCCTGCTCCACGTGGTCTGTCTGATTTGGGCCACATGCAAGTCCTACCGCTCCCCGGGAAGGCTGACTGTGCTGCTCCAGGAGATTTGCAACCTTCTCATCCAGCAGGCCTCTAATTATCTCAGCCCAGAAGACCTGCTGAGAAGTGAGGTAGAAGAAAGTCAGAGAAAACTGCAAGTGGTCTCAGACACTTTGAGCTTCTTCAAGCAAGAGTTTCAGGACAGAAGGGAGAATCTCCACACTTACTTCAAAGAGAACCAGGAAGTCAAGGAATGGGATTTCCAGTCTTCTTTGGTCTTTGTGCGATTGGATGGCTTCCTGGGACAACTGCACGTGGTGGAGGGTCTTCTGAAGACGGCCCTGGATTTCCACAAACTGGGAAAGGTGGAGTTCAGCGGCGTCAGAGGGAATGCTCTGAGTCAGCAGGTCCAGCAAATGCATGAAGAATTTCAAGAGATGTACAGGCTTCTCTCAGGATCCTCCTCCGACTGCCTGTACCTCCAAAGCACGGACTTTGAAAATGACGTCTCTGAATTTAACCAGAAAGTAGAAGATCTTGACCGAAGATTGGGGACTATCTTTATTCAAGCTTTTGATGATGCACCTGGCTTGGAGCATGCCTTTAAGCTGCTAGACATAGCAGGAAACCTCCTTGAAAGACCGCTGGTAGCGAGGGATACATCTGATAAATACCTGGTCCTCATCCAAATGTTCAACAAAGATCTGGATGCAGTGAGGATGATCTACAGTCAGCACGTCCAGGAGGAAGCAGAACTTGGGTTCTCCCCGGTGCACAAGAACATGCCCACCGTGGCTGGCGGCCTCCGCTGGGCACAGGAGCTGAGGCAGCGCATCCAGGGTCCTTTCAGCAACTTTGGACGCATCACACACCCTTGCATGGAATCTGCAGAAGGAAAGCGAATGCAACAAAAATATGAAGATATGCTGTCATTGCTAGAAAAGTATGAGACAAGACTTTATGAGGATTGGTGCCGGACAGTATCAGAGAAGTCACAGTACAATCTTTCCCAACCACTTCTAAAACGTGACCCAGAGACGAAGGAGATCACTATCAACTTTAACCCACAGCTGATTTCAGTGCTGAAAGAAATGAGCTATCTTGAACCCAGAGAGATGAAACACATGCCTGAGACAGCAGCAGCCATGTTCTCCTCCAGGGATTTCTATCGGCAGCTTGTGGCTAATTTAGAGTTGATGGCAAATTGGTACAACAAGGTTATGAAAACTCTGCTGGAGGTGGAATTTCCATTAGTGGAGGAAGAGCTGCAAAATATTGATCTCCGCCTCAGAGCAGCAGAGGAGACTTTGAACTGGAAAACAGAAGGCATTTGCGATTATGTCACTGAAATCACCAGTAGTATTCATGATCTTGAACAAAGAATTCAGAAAACTAAAGACAATGTGGAAGAGATCCAAAACATCATGAAAACATGGGTGACTCCAATATTTAAGACAAAAGATGGAAAAAGGGAATCCCTTCTTTCTCTGGATGATCGGCATGATCGAATGGAAAAATATTACAATCTCATCAAGGAATCTGGCCTTAAGATCCACGCCCTTGTTCAGGAAAACCTGGGTCTATTTTCAGCAGACCCAACCTCCAATATCTGGAAGACTTATGTTAACTCTATTGACAATTTGTTGCTGAATGGATTCTTTCTTGCCATTGAGTGCTCCCTCAAGTATCTTCTGGAAAATACTGAGTGTAAGGCAGGACTTACCCCAATATTTGAAGCACAACTGAGTCTAGCCATCCCAGAGCTAGTTTTCTATCCGTCTCTGGAGTCTGGAGTGAAGGGGGGTTTCTGTGACATTGTTGAGGGTCTCATCACCAGCATTTTTAGGATACCATCTCTGGTGCCACGGCTTTCCCCACAAAATGGCTCTCCTCACTATCAGGTCGACCTGGACGGTATACCAGATTTGGCAAACATGCGGCGCACACTCATGGAGAGAGTCCAGAGAATGATGGGCCTCTGCTGTGGCTATCAGAGCACCTTCAGCCAGTATTCGTACCTCTATGTGGAGGACCGGAAGGAGGTTCTGGGTCAGTTTCTGCTGTACGGGCACATCCTCACTCCGGAAGAAATTGAAGACCATGTGGAAGATGGCATCCCAGAGAACCCTCCCCTCCTTTCTCAGTTTAAAGTGCAAATCGACTCCTATGAAACGCTCTATGAAGAGGTGTGCAGGCTGGAACCCATCAAGGTGTTTGACGGCTGGATGAAAATTGATATTCGACCCTTTAAGGCATCTCTGCTGAATATTATTAAGAGGTGGAGCCTCCTGTTCAAACAGCATCTTGTGGACCACGTCACTCACAGCTTGGCCAACCTGGATGCGTTTATAAAGAAGAGTGAGAGCGGCTTACTCAAGAAAGTTGAAAAAGGAGATTTCCAAGGCTTGGTTGAGATCATGGGACACCTTATGGCTGTTAAAGAACGGCAGAGTAACACTGATGAGATGTTTGAGCCCTTAAAGCAGACTATTGAATTGCTGAAGACCTATGAACAAGAATTGCCAGAAACAGTGTTTAAGCAGCTGGAGGAGCTGCCTGAGAAATGGAACAACATAAAAAAGGTGGCCATTACTGTGAAGCAGCAGGTGGCCCCACTGCAGGCAAATGAAGTGACACTCCTCCGCCAGAGGTGCACAGCCTTCGATGCAGAACAGCAGCAATTCTGGGAGCAATTCCACAAAGAAGCCCCGTTCAGGTTTGATAGCATCCACCCTCATCAAATGCTGGATGCCAGGCACATCGAGATCCAGCAGATGGAATCCACTATGGCCTCCATTTCTGAGTCTGCCAGCTTATTTGAAGTCAATGTCCCTGACTATAAGCAGCTGAGGCAGTGCAGGAAGGAGGTCTGCCAGCTGAAGGAGCTCTGGGACACCATTGGAATGGTGACCTCCAGCATCCATGCCTGGGAGACCACACCCTGGAGGAATATCAACGTGGAAGCCATGGAGTTGGAGTGCAAACAGTTTGCCCGGCATATCCGAAACCTGGACAAGGAGGTCAGGGCCTGGGATGCATTCACAGGCCTGGAAAGCACTGTGTGGAACACGCTGAGCTCCCTGAGGGCAGTAGCTGAGCTGCAGAATCCAGCCATCCGGGAGCGGCACTGGAGGCAGCTGATGCAGGCCACCGGTGTGAGCTTCACTATGGACCAGGACACCACCCTAGCGCACCTGCTGCAGCTCCAGCTGCACCACTATGAGGATGAGGTCCGGGGCATTGTGGACAAAGCTGCAAAAGAGATGGGTATGGAGAAAACCTTAAAGGAGCTGCAGACTACCTGGGCTGGCATGGAATTCCAGTATGAGCCCCACCCACGGACCAATGTCCCCCTCCTGTGCTCTGATGAGGACCTCATAGAGGTTCTGGAGGATAATCAAGTTCAACTTCAGAACCTGGTGATGTCCAAGTATGTTGCTTTCTTCTTGGAGGAGGTGTCGGGCTGGCAGAAGAAGCTGTCCACAGTGGACGCTGTCATCTCTATCTGGTTTGAAGTGCAGCGAACATGGACTCACCTGGAAAGCATATTCACTGGATCTGAAGATATTCGGGCACAGCTACCCCAGGATTCTAAAAGGTTTGAAGGCATCGACATTGACTTTAAAGAGCTAGCTTATGATGCCCAGAAAATTCCAAATGTAGTGCAAACCACCAACAAGCCAGGCCTGTATGAAAAGCTGGAGGATATTCAGGGCAGATTGTGCCTGTGTGAGAAGGCCCTGGCAGAGTACCTCGACACCAAGAGGCTTGCCTTCCCGCGGTTTTACTTTCTCTCCTCCTCCGATCTGTTAGACATCCTTTCCAACGGCACAGCTCCACAACAGGTTCAACGTCACCTTTCCAAACTCTTTGACAACATGGCCAAGATGCGATTCCAGCTAGATGCCAGTGGGGAACCAACCAAGACAAGCCTCGGCATGTACAGCAAAGAAGAGGAGTATGTGGCTTTCAGTGAGCCCTGTGACTGCAGCGGGCAGGTAGAAATATGGCTGAACCATGTCCTTGGTCACATGAAGGCCACTGTGAGGCATGAGATGACAGAAGGTGTAACTGCCTATGAAGAAAAGCCGAGGGAGCAGTGGCTTTTTGACCACCCAGCTCAGGTGGCCCTGACCTGTACTCAGATCTGGTGGACAACAGAAGTGGGCATGGCATTTGCCAGGCTGGAGGAAGGCTATGAGAGTGCCATGAAGGACTATTATAAGAAGCAAGTGGCCCAGCTCAAAACCCTTATCACCATGCTGATTGGCCAGCTCTCCAAGGGAGACCGGCAGAAGATTATGACTATATGCACCATCGATGTGCATGCCCGGGATGTGGTAGCCAAGATGATTGCTCAGAAGGTAGACAATGCCCAGGCTTTCCTCTGGCTGTCTCAGCTGCGCCATCGTTGGGATGACGAGGTCAAACACTGCTTTGCCAACATCTGTGATGCCCAGTTTTTGTATTCCTATGAGTACCTGGGAAACACACCTCGCTTGGTGATCACACCTTTGACT------GACAGGTGCTACATCACCCTCACCCAGTCCCTGCACCTGACCATGAGTGGGGCTCCCGCAGGACCTGCAGGCACAGGCAAGACCGAGACCACCAAGGACCTGGGCCGCGCACTGGGCATCCTGGTCTATGTGTTCAACTGCTCGGAGCAGATGGATTACAAGTCTTGTGGCAACATCTACAAAGGCCTTGCTCAGACTGGTGCCTGGGGCTGCTTTGATGAGTTTAATCGAATCTCCGTGGAGGTCTTGTCAGTGGTGGCAGTGCAGGTAAAAAGCATTCAAGATGCGATTAGAGATAAGAAGCAGTGGTTCAGCTTCCTTGGGGAGGAGATCAGCCTGAATCCTTCTGTCGGTATCTTCATCACCATGAACCCAGGCTATGCTGGCCGCACAGAGCTGCCAGAGAATCTCAAGTCTCTCTTCAGGCCTTGTGCAATGGTGGTTCCAGACTTTGAATTGATCTGTGAAATCATGCTGGTGGCAGAAGGATTCATTGAAGCCCAGTCATTAGCCAGAAAGTTCATCACTCTTTACCAGTTGTGCAAAGAGCTTCTCTCCAAACAGGATCACTACGACTGGGGCCTACGGGCCATCAAGTCCGTGCTGGTGGTGGCAGGATCCCTGAAGAGAGGAGACCCTGACCGGCCTGAGGACCAGGTCCTGATGCGCTCCTTGCGGGATTTCAACATCCCCAAGATTGTGACTGATGACATGCCCATCTTCATGGGCCTGATCGGGGACCTCTTTCCCGCCCTGGATGTCCCCCGGAGGAGAGACCCCAACTTCGAAGCTTTGGTTAGGAAGGCGATAGTGGATCTGAAGCTCCAGGCTGAGGACAACTTTGTGCTCAAGGTGGTCCAGCTGGAGGAGCTCCTGGCTGTGCGGCACTCTGTA---TTTGTGGTGGGTGGCGCTGGT------ACCGGCAAGTCACAGGTGCTGAGGTCCTTGCACAAGACCTATCAG------ATCATGAAACGGCGCCCCGTCTGGACTGACCTCAATCCCAAAGCAGTCACAAATGATGAGCTCTTT---------------------------------------GGCATCATCAATCCAGCCACAGGAGAATGGAAGGATGGA---TTGTTCTCTTCCATCATGCGGGAGCTTGCCAACATCACCCATGATGGGCCCAAGTGGATTTTACTGGATGGCGACATAGATCCAATGTGGATTGAATCCCTGAATACTGTCATGGATGATAACAAGGTGCTGACATTGGCCAGCAATGAGAGGATTCCTCTGAACCCCACCATGAAGCTCCTCTTTGAGATCAGCCACCTGCGCACAGCCACTCCAGCAACTGTCTCTAGAGCAGGGATCTTGTACATCAACCCGGCAGACTTGGGATGGAACCCTCCAGTGAGCAGCTGGATTGAGAAGAGGGAAATCCAGACAGAGAGAGCCAACTTAACCATTTTGTTCGACAAGTATCTTCCAACCTGCCTAGACACACTCAGAACCAGGTTTAAGAAGATCATTCCCATCCCAGAGCAGAGCATGGTTCAGATGGTGTGTCACCTTCTGGAATGTCTCCTGACCACGGAGGACATCCCTGCAGACTGCCCTAAGGAAATTTATGAGCATTATTTTGTGTTTGCTGCCATCTGGGCTTTCGGCGGAGCAATGGTCCAAGATCAGCTTGTGGACTACCGGGCAGAGTTCAGCAAATGGTGGCTGACTGAGTTCAAAACAGTCAAGTTTCCTTCCCAAGGAACCATCTTTGACTATTACATCGACCCAGAGACCAAGAAATTCGAGCCTTGGTCCAAGCTCGTCCCCCAGTTCGAATTTGACCCCGAGATGCCCTTGCAGGCGTGTTTGGTGCACACGAGTGAGACCATCCGTGTGTGCTACTTCATGGAGCGGTTGATGGCGCGGCAGCGGCCTGTCATGCTGGTGGGCACGGCTGGCACTGGCAAGTCGGTGCTGGTGGGAGCTAAGCTGGCCAGCCTTGACCCCGAGGCATACCTGGTGAAAAACGTGCCATTCAACTACTACACCACGTCAGCAATGCTGCAGGCTGTCCTGGAGAAGCCTCTGGAAAAGAAGGCTGGCAGAAACTATGGCCCTCCAGGGAACAAGAAACTCATCTATTTCATTGATGACATGAACATGCCTGAGGTGGATGCCTACGGGACGGTGCAGCCCCACACCATCATCCGGCAGCATCTGGACTATGGCCACTGGTATGATCGGAGCAAGCTGTCCCTAAAGGAGATCACAAATGTACAGTATGTTTCCTGTATGAACCCCACGGCAGGCAGCTTCACCATCAACCCCCGGCTTCAGCGTCACTTCAGCGTGTTTGTCCTCTCCTTCCCGGGGGCAGATGCCCTGTCCTCTATCTACAGCATCATCCTCACTCAGCATCTGAAGCTCGGAAACTTCCCGGCGTCCCTGCAGAAATCCATCCCCCCACTGATCGATCTGGCCCTCGCCTTCCACCAGAAAATTGCTACCACCTTCCTACCCACAGGAATCAAATTCCACTACATCTTCAACCTCAGAGATTTTGCCAACATTTTCCAGGGCATTCTCTTCTCCTCAGTGGAATGTGTGAAATCCACATGGGATCTTATAAGGCTCTATCTGCATGAATCAAATCGAGTTTATCGGGATAAGATGGTAGAAGAAAAGGACTTTGATCTTTTTGATAAAATCCAGACAGAAGTGCTCAAGAAAACTTTTGATGATATTGAAGACCCTGTGGAGCAGACCCAAAGCCCGAACCTGTATTGTCACTTTGCAAATGGTATTGGGGAGCCCAAATACATGCCTGTACAGTCTTGGGAACTTTTGACCCAGACTCTGGTGGAGGCCTTGGAGAACCACAATGAAGTCAACACAGTGATGGACCTAGTTCTCTTTGAGGATGCCATGCGCCATGTCTGCCATATCAATCGCATCTTGGAGTCCCCGCGGGGAAATGCTCTGCTGGTTGGTGTAGGTGGGAGCGGCAAGCAGAGCCTGACAAGGCTGGCAGCTTTCATCAGCTCCATGGATGTCTTCCAGATCACACTGCGCAAAGGCTACCAGATCCAGGACTTCAAGATGGACCTGGCCAGCCTGTGTCTGAAAGCTGGAGTGAAGAATCTCAACACAGTGTTTCTCATGACTGATGCCCAAGTGGCTGATGAGAGGTTCCTTGTGCTCATCAATGATCTTTTGGCATCTGGGGAGATCCCAGATCTCTACTCTGATGATGAAGTTGAAAACATCATAAGCAATGTGAGGAATGAAGTCAAGAGCCAGGGTCTGGTTGACAACAGAGAGAACTGTTGGAAGTTCTTTATAGATCGGATCCGGCGACAGCTGAAGGTGACTCTCTGTTTCTCCCCTGTGGGAAACAAGCTAAGAGTCCGCAGCAGGAAGTTCCCAGCCATTGTGAACTGCACAGCCATCCACTGGTTCCACGAGTGGCCTCAGCAAGCATTGGAGTCTGTCAGCCTCCGCTTCTTGCAGAACACAGAGGGCATTGAGCCCACAGTAAAGCAGTCGATTAGCAAATTCATGGCCTTTGTCCACACAAGTGTCAACCAAACATCCCAGTCTTATCTGAGCAATGAACAGCGCTACAACTATACAACTCCCAAGTCCTTTCTGGAGTTCATCAGACTCTACCAGAGCTTGTTGCACAGGCACAGAAAAGAGCTCAAGTGCAAGACAGAGCGGTTGGAGAACGGGCTGCTGAAGCTGCATAGCACCTCTGCCCAGGTGGATGATCTGAAAGCAAAGCTGGCTGCCCAGGAAGTAGAGCTGAAGCAGAAAAATGAAGATGCAGACAAACTGATTCAGGTCGTGGGTGTGGAGACTGACAAAGTGAGCAGAGAGAAAGCCATGGCAGATGAAGAGGAGCAGAAGGTGGCCGTCATCATGCTAGAGGTGAAACAGAAGCAGAAGGACTGTGAGGAGGACCTGGCAAAGGCTGAGCCAGCACTCACAGCAGCGCAGGCAGCTCTCAACACCCTGAACAAGACCAACCTGACAGAGCTGAAGTCATTTGGCTCTCCGCCTCTGGCCGTCAGCAATGTCAGCGCTGCGGTGATGGTACTGATGGCTCCCAGGGGTAGGGTGCCCAAGGACCGGAGCTGGAAGGCTGCTAAGGTCACCATGGCCAAAGTGGATGGCTTCCTGGACTCGCTAATAAACTTCAACAAAGAGAACATTCACGAGAACTGCCTCAAAGCCATCAGGCCGTATCTGCAAGACCCCGAGTTCAATCCTGAGTTTGTGGCCACCAAATCCTATGCGGCTGCAGGCCTCTGCTCCTGGGTCATCAATATTGTGAGATTTTATGAGGTGTTCTGTGATGTGGAACCCAAGCGCCAGGCACTGAACAAAGCCACCGCGGACCTCACAGCTGCCCAGGAGAAGCTGGCTGCCATCAAAGCCAAGATCGCTCACCTTAATGAAAACCTGGCAAAGCTCACAGCCAGGTTTGAGAAAGCAACAGCAGACAAACTCAAATGTCAGCAAGAAGCCGAAGTGACCGCAGTCACCATCTCCCTTGCCAACCGCCTGGTTGGAGGACTCGCTTCTGAAAACGTGAGGTGGGCAGATGCCGTGCAGAACTTCAAACAGCAGGAAAGGACGTTATGTGGAGACATTTTACTTATAACGGCTTTCATTTCCTACCTTGGCTTCTTCACAAAGAAATACCGGCAGAGCCTCCTGGACAGAACTTGGAGGCCCTACCTGAGCCAGCTGAAAACTCCCATTCCAGTCACCCCAGCCCTGGATCCCCTGAGGATGCTGATGGATGATGCTGACGTGGCTGCCTGGCAGAACGAGGGCCTCCCAGCCGACCGCATGTCCGTGGAGAATGCCACCATTCTCATCAACTGTGAGCGCTGGCCACTCATGGTTGACCCTCAGCTACAAGGCATCAAATGGATCAAGAATAAATATGGTGAAGATCTCCGGGTCACGCAGATTGGTCAGAAAGGCTACCTTCAAATCATAGAGCAGGCCCTGGAAGCTGGAGCTGTGGTGCTGATTGAAAATCTAGAGGAGTCCATTGATCCTGTTCTGGGACCCCTGCTTGGGAGAGAAGTCATTAAAAAAGGACGATTCATTAAAATTGGAGACAAAGAATGTGAATACAATCCCAAGTTCCGGCTCATCCTCCACACCAAGCTGGCTAATCCTCACTACCAGCCTGAGCTGCAGGCTCAGGCCACCCTGATCAACTTCACCGTGACCAGGGATGGCCTGGAGGACCAGTTGCTGGCCGCTGTGGTCAGCATGGAGAGGCCAGACTTGGAGCAGCTGAAGTCCGATCTCACAAAGCAGCAGAATGGATTCAAAATTACCCTGAAAACGTTGGAAGACAGTCTTCTCTCTCGCCTCTCCTCCGCCTCTGGGAACTTCCTGGGAGAAACAGTGCTGGTGGAAAACCTAGAGATCACCAAGCAGACTGCTGCCGAAGTTGAGAAAAAGGTCCAGGAGGCCAAGGTGACTGAAGTGAAAATCAACGAGGCCCGAGAGCACTACCGGCCAGCAGCTGCCAGGGCCTCACTGCTCTACTTCATCATGAACGACCTCAGCAAGATCCATCCAATGTACCAGTTTTCTCTCAAGGCCTTCAGTATCGTCTTCCAGAAGGCTGTGGAG------AGGGCTGCTCCTGACGAAAGCCTCAGGGAGCGGGTGGCCAACCTAATAGACAGCATAACCTTCTCTGTGTACCAGTACACCATCCGCGGGCTCTTTGAGTGTGATAAGCTGACCTACCTTGCCCAGCTCACCTTTCAGATTCTCCTCATGAACCGAGAAGTCAATGCAGTGGAGTTGGATTTCCTGCTTCGATCTCCAGTGCAGACGGGCACCGCCAGCCCCGTGGAGTTCCTCTCCCATCAGGCGTGGGGAGCTGTCAAGGTACTTTCATCAATGGAAGAATTCTCTAATCTGGATCGGGACATAGAGGGATCTGCTAAGAGCTGGAAAAAGTTTGTGGAGTCCGAATGTCCTGAGAAAGAGAAGCTCCCACAGGAGTGGAAGAACAAGACAGCCCTGCAGCGCCTCTGCATGCTGAGAGCCATGCGGCCCGACCGGATGACCTATGCTTTGCGAGATTTTGTTGAAGAGAAGTTAGGAAGCAAATACGTGGTGGGAAGAGCCCTAGATTTTGCAACCTCATTTGAAGAATCGGGACCAGCCACTCCTATGTTTTTCATCCTGTCTCCAGGGGTGGACCCACTGAAGGATGTAGAAAGTCAAGGAAGAAAACTTGGATACACCTTCAACAATCAGAACTTTCACAACGTGTCTTTGGGGCAAGGACAGGAAGTGGTGGCTGAGGCTGCGCTGGACCTCGCTGCCAAGAAAGGTCACTGGGTTATTTTGCAGAACATTCACCTGGTGGCCAAGTGGCTCAGCACCCTGGAGAAGAAGCTGGAGGAGCACAGTGAGAACAGCCACCCAGAGTTCAGGGTCTTCATGAGTGCAGAGCCAGCACCCTCCCCTGAGGGCCACATCATCCCCCAGGGCATCCTGGAGAACTCCATTAAGATCACCAATGAGCCCCCCACGGGCATGCATGCCAACCTGCACAAGGCCCTGGACAACTTCACTCAGGACACTCTGGAGATGTGTTCTCGGGAGACGGAGTTTAAGAGCATCCTCTTTGCTCTTTGTTACTTCCATGCGGTGGTGGCAGAAAGACGAAAATTTGGGCCCCAGGGATGGAATCGCTCATACCCCTTTAACACTGGAGACCTCACTATCTCTGTGAATGTCCTCTACAACTTCCTGGAGGCCAACGCAAAGGTCCCCTATGATGATTTGCGCTACCTGTTTGGAGAGATCATGTATGGAGGCCATATCACAGATGACTGGGACAGAAGACTCTGCAGAACCTACCTGGGGGAATTCATTCGACCAGAAATGTTAGAAGGAGAACTGTCTTTGGCCCCAGGGTTCCCACTCCCAGGCAACATGGACTACAATGGTTATCATCAGTACATCGATGCTGAGCTGCCCCCAGAATCCCCCTACCTCTATGGCCTCCACCCGAACGCAGAGATTGGCTTCCTGACCCAAACCTCAGAAAAGCTCTTCCGCACTGTGCTGGAGCTGCAGCCTCGGGACAGCCAGGCCAGAGACGGAGCGGGCGCCACAAGAGAAGAAAAGGTCAAGGCACTTCTGGAAGAAATATTGGAGCGGGTGACAGACGAGTTTAACATCCCAGAACTGATGGCCAAAGTGGAGGAGCGCACCCCTTACATTGTAGTTGCCTTCCAGGAGTGTGGCCGGATGAATATCCTCACCAGAGAGATTCAGCGCTCACTGAGGGAGCTGGAGCTCGGCTTAAAGGGGGAGCTGACTATGACCAGCCACATGGAGAACTTACAGAATGCCCTGTACTTCGATATGGTGCCAGAGTCCTGGGCTAGACGAGCCTACCCTTCCACAGCAGGCCTGGCAGCCTGGTTTCCAGACCTCCTCAACAGAATCAAGGAGCTAGAGGCTTGGACGGGTGACTTTACAATGCCCTCCACTGTGTGGCTGACAGGCTTCTTCAACCCCCAGTCGTTCCTGACTGCCATCATGCAGTCCACGGCTCGCAAGAATGAGTGGCCACTGGACCAGATGGCCCTGCAATGTGACATGACGAAGAAGAACAGAGAAGAGTTTAGGAGTCCTCCTCGGGAAGGGGCCTACATCCATGGCCTCTTCATGGAAGGTGCCTGCTGGGACACACAGGCTGGGATCATTACAGAGGCAAAGCTGAAGGATCTGACACCCCCTATGCCTGTGATGTTCATCAAGGCCATTCCTGCAGATAAGCAGGACTGCCGCAGTGTCTATTCCTGTCCTGTGTACAAGACTAGTCAGCGGGGACCCACCTACGTGTGGACTTTCAACCTGAAGACTAAGGAAAACCCATCCAAGTGGGTTCTGGCTGGAGTAGCCTTGCTTCTCCAGATT",
"Mouse": "ATGCCCGGCGCCAAGGAGCAGGCAGCGCTG---------GCGGAGTCTGGGGACGAGGAGCCTGGA------GACCCGAGGCTGCGGCTTCTGGGGACTTTTGTGGCTCGGAGCCTGCGTCCGGCCGCGGGCACCTGGGAGCGCTGTGCAGGCACAGCCGAGGCGGGGAGGCTGCTGCAGGCCTTCCTG---GATCACAACGCTGCCTCGGATCCGCGGCCA------CTGCTGGTGGTTCAGTCCGGGCCCGGGGGCCTGGTGGTGACACCCGGTCTAGACGCAGGACCAGAGCCCAGCCGAGCTCGCGCCAAGGGGCTCTTTTTCTTGCGCACTAAG---TCCGAGCCTCCGGGAAATCACAGCCTCCGCGGCACGGTGCTCTGCGGGGACCTACCCGCGGTGCCACTGGAGCACCTGGCCCCGCTGCTCTCAGAGGTCATTATTCCTGTCCTGGCAAATGAAAAGAACCATTTAGAATGGCCCCACATGGTATGTCAAGACATCAGACATCATGCCCACACCCTGAAGTCTGACCTCCTAGTGATCTTTGAGCACATGAAGGGGAGAACCTTGCTGCCTCTTCCAGTTGGCTCAGAAAAACTGGAGTTTGTGGATGGCCACAGTGAGCCAGTCTCAGATGCCATAGACAAGTCAACTCTCTATGCTGTGGAGTCTGCAGTGATCAAATGGAGCCACCAAGTCCAGGTGGTACTCAAGAGGGAGTCTTCTCAGGCACTCATACAAGGACAAAATCCCACCCCCAAGGTGGAGCTGGAGTTCTGGAAGAGCAGGTGTGAGGACCTGGAACACATTTATAATCAACTAATGACAATCAAGGTGAAGGGAATGGCTGAACTCCTGGACAAACTTCAGAGCAGCTACTTGCCAGCTTTCAAAGCCATGTTCAGAGACGTTGAAGCAGCCCTGACCGAGGCCCAGGACATCCATGTGCACCTGTTACCTCTCCAGCAACACCTGGACATCCTGGAAAACGTGGAGTTTCCCAAGGTGAAGGGCAGGCTGCGGCCTCTGCTCCATGTGGTCTGTCTGATTTGGGCCACCTGCAAATGGTACCGTTCCCCTGGGAGGCTCACAGTGCTGCTCCAAGAAATCTGCAACCTCCTCATCCAGCAGGCCTCTAATTACCTCAGCCCAGAAGACCTCCTGAGAAGTGAGGTGGAAGAGAGTCAGAAAAAACTGCAAGTGGTCTCAGATACCTTAAGCTTCTTCAAACAGGCATTCCAGGACAGAAGGGAGCACCTCCACACTTACTTCAAGGAGGATTCTGAAGTCAGGGTGTGGGATTTCCAAGCATCTCTGGTGTTTGTGCGACTGGATGGCTTTCTGGGCCGAGTGCACATGGTGGAGGATCTTCTGAAGACAGCCTTGGATCTCAACAATCTGGAAAAGCTTGAGTTCAGTGGCCTCAGAGGAAACTCCCTGAGTCAGAAAGTCCAACGCATGCATGAGGAATTTGAGGAGATGTACAAGGTCTTCTTGGACTGCTCCTATGACTGTTTGGACCCCAAGGGCACGGAATTTGAAAATGATGTCTGTGAGTTTAACAAAAGAGTGGAAGATCTTGACCGGAGACTGGGGACTATCTTAATTCAAGCTTTTGATGATGCACCTGATGTGGAACATGCCTTTAAGCTACTGGACATCACAGGAACCCTCATCAAAAGACCCCTGGTAGCACAGGATGTATCACAAAAATACCTGGCCCTCATCCGAATGTTCAGCACAGAACTGGATGCTGTGAGGGTCATCTACAGTCAGCACATCCAGAAGGAGGCAGAGCATGGATTCTCCCCCGTGCACAAGAACATGCCCACTATGGCTGGCGGCATCTGCTGGGCACAGGAACTGAGGCAGCGCGTCAAGGGTCCCTTTGGCAACTTCAAAAACATACCACATCTGTACTTGCAATCTGCTGAAGGAAAGCGAATGATACAAAAATACGAAGACTTGCTCTCCCTGCTAGAAGAGTATGAGAGAAGACTTTATGAGGACTGGTGTCAGACGGTATCTGAAAAGTCACAGTACAATCTTTCCCTACCTCTTTTGCATCGTGACCCCAACACAAAGCAGCTCTCTGTCAACTTTAACCCACAGCTGATTTCAGTGTTGAAAGAAATGAACTATCTTCAGCCCAGTGAGGTGAAAACCATCCCCGAGACCGCAGCAGCCATGTTCTCCTCCAGGGAATTCTATCGTCAGCTTGTGGCCAACTTGGAGTTGATGGCAAATTGGTACAACAAGGTTATAAAAATTCTGCTGGAGGTGGAATTTCCACTAGTGGAGGAAGAACTGCAAAATATTGATCTCCGCCTGAGAGCTGCAGAGGAGACTCTGAGCTGGAAAACAGAAGGCATTTGGGATTATGCTATGCAAATAACCAATAGCATTCATGACCTGGAACAAAGAATTCAGAAGACAAAAGACAATGTGGAAGAGATTCAAAACATCATGAAAACATGGGTGTCTCCAATATTCAAGAGAAAAGATGGGAAAAAAGAATGGCCCCTTTCTCTGGATGATCAGCAGGATCACATGGAAAAATACTACAGTCTCATCCAGGAATCTGGCCTTAAGATTCACGCTCTTGTTCAGGAAAACCTGGTTCTGTTTGCAGCAGACCCAGCATCCAGCATTTGGAAGTCTTACGTGAACTACATTGATTCCATGTTGTTGGATGGATTTTTTCTTGCCATTGAGTGTTCTCTCAAATATCTATTGGAAAACACTGAATGCAAGCCTGGACTCACCCCAGTATTTGAAGCACAGCTCAACCTTGTCACCCCAGAATTAGTTTTCCACCCCTCTCTGGACTCTGGGGTAAAGGGAGGCTTATATGACATTGTCCAGAGTCTTGTCACCAGAATTTTTGCTATGCCATCCCTCGTGCCACGGCTTTCCCCACACAGTGGCTCTCCTCACTATCAGGGTGACCTAGAGGACATGGCCGACTTAGCTGGTCTTCGGAGTGTGCTCATGGAGAGGGTACAGAATATGATGACCCTCTGCTGTGGCTATAGAAATACCCTCAGCCAGTATTCTTACCTGTATGTGGAGGATAGGAAGGAGATTCTTGGTCAGTTTCTGCTCTATGGGCATGTCCTCACACCTGAAGAGATAGAAGCCCATGCCGAAGACGGCATTCCAGAAAATCCACCCCTCCTCCATCACTTCAAAGACCAGATAGACTCCTATGAAAAGCTCTATGAGGAGGTGGTCAGCCTGGAACCCACCAAGGTGTTTGATGGCTGGATGCGAGTGGATGTGAGACCCTTCAAAGCATCTCTGCTGAACACAATAAAGAAGTGGAGCCTCATGTTCAAGCAACATCTTGTTGACTTTGTCACAAACAGCCTGTCTGACCTTGACTCATTCATAAGGAGCACCGAGAGTGGTTTGCTCAAGAGGGTGGAGAAAGGAGATTTCCAAGGATTGGTTGAGATCATGGGACATCTTGTCACCCTTAAAGAACGGCAGAGCAGCACCGATGACATGTTTGAGCCCCTGAAGCAAACGATTGAACTGCTGAAGTCCTACGAACAAGAGCTGCCAGAAACCGTGTTTAAGCAACTGGAGGAGCTTCCTGAGAAGTGGAAGAACATGAAGAAGATGGCCATCACTGTGAGGCAACAGGTGGCCCCTCTGCAGGCAAATGAAGTGGCCCTACTCCGCCAGCGGTGCTCAGCCTTCGATGATGAGCAGCAGCAATTCCAGGAGAGGTTCCGCAAAGAGGCCCCTTTCAGGTTTGATAGTATCAATCCACACCAAATGCTGGATGCCTGGCACGTGGAGATCCAGCACATGGAATCCACCATGGCAACCATCTCTAAGTCGGCTGATTTGTTTGAAGTCAATGTTCCTGACTACAAGCAGCTGAGGCAGTGCAGGAAGGAGGCCTGCCAGTTAAAGGAGCTCTGGGACACCATTGGAATGGTGACCTCCAGCATCCGTGCCTGGGAGGCCACCAGCTGGAGGAATATCAGTGTGGAAGCCATGGACTCAGAGTGCAAGCAGTTCGCCCGGCACATCCGCAACCTAGATAAGGAGTTCAGGTCCTGGGATGCATTCACGGGCCTGGAAAGCACAGTGTTGAACACCCTGACGTCCCTGAGGGCTGTGGCGGAGCTGCAGAATCCTGCCATCCGGGATCGGCACTGGAGGCAGCTGATGCAGGCCACCGGGGTGAATTTCACCATGAATCAGGATACCACCTTAGCTCATCTCCTGCAGCTTCAGCTCCACCACTTCGAGGATGAGGTCCGAGGCATTGTGGACAGAGCTGTCAAAGAGATGAGTATGGAGAAGACCTTAAAGGAATTGCAGACTACCTGGGCCAGCATGGAATTCCAGTACGAGTCCCACGCAAGAACCCGCGTACCCTTGCTGCAGTCAGATGAGGATCTCATTGAGGTCCTAGAGGACAATCAAGTGCAACTTCAGAACCTGATGATGTCCAAATATGTTGCTTTCTTCCTGGAAGAAGTGTCGAGCTGGCAGAAGAAGCTGTCCACGGCTGACTCGGTCATCTCTATCTGGTTTGAGGTGCAGCGCACCTGGTCTCACCTAGAGAGCATATTCATTGGCTCAGAAGATATCCGGGCTCAGCTACCCCAGGACTCTAAGAGATTTGAAGGCATTGACTCTGACTTCAGAGAGCTGGCGTATGATGCTCAGAAAACCCCAAATGTGGTGGAAGCCACAAATAAGTCAGGTCTCTATGAAAAGCTGGAGGATATACAAAGCAGATTATGCCTGTGTGAGAAAGCCCTAGCAGAGTATCTAGACACCAAGAGGCTCAGCTTCCCTCGCTTTTACTTCCTGTCCTCCTCTGACCTGCTGGACATCCTTTCCAATGGCACAGCTCCACAACAGGTTCAACGGCACCTCTCCAAGCTCTTTGACAACATGGCTAAGATGCAGTTCCAGTTAGATGCCAGTCAGAACCCAACCAAGACGAGCCTTGGCATGTACAGCAAAGAGGAGGAATATGTGGCCTTCAGTGAGGCCTGTGACTGCAGTGGGCAGGTTGAAATATGGCTGAACCGTGTTCTTCGTCACATGAAAGCCACTGTGAGGCATGAGATGACAGAGGGGGTCACTGCCTATGAGGAAAAGCCCAGGGATCAGTGGCTGTTTGATTACCCGGCTCAGGTGGCTCTGACCTGCACTCAGATCTGGTGGACGACAGAGGTGGGCATTGCATTTGCCAGGCTGGAGGAAGGCTATGAGAGTGCCATGAAGGACTACTATAAGAAGCAAGTGGCCCAACTCAAAACCCTTATCACCATGCTAATTGGGCCGCTCTCCAAGGGGGACAGGCAAAAGATCATGACCATATGCACCATCGATGTGCATGCCCGGGATGTGGTAGCCAAGATGATTGCTCAAAAGGTTGACAATGCCCAGGCTTTCCTCTGGCTGTCACAGCTACGACATCGTTGGGATGATGAGGCCAAGCACTGCTTTGCTAACATCTGTGACGCCCAGTTTCTATATTCCTATGAGTACTTGGGAAATACACCTCGCCTAGTGATCACGCCTCTGACT------GATAGGTGCTACATCACTCTCACCCAGTCTCTACACTTGACCATGAGTGGGGCTCCAGCAGGACCTGCAGGCACAGGCAAGACAGAGACCACCAAGGACCTGGGCAGAGCACTCGGCATCATGGTCTATGTGTTTAACTGTTCTGAGCAGATGGACTACAAGTCCTGTGGCAACATCTACAAAGGCCTGGCTCAGACTGGTGCCTGGGGCTGTTTTGATGAGTTTAACCGAATCTCTGTGGAGGTCTTGTCGGTGGTGGCTGTCCAGGTAAAAAGCATCCAGGATGCAATCAGAGACAAGAAGCAGAGGTTCAGCTTCCTTGGAGAGGAGATTAGCCTTGACCCTTCAGTGGGCATCTTCATTACCATGAACCCAGGCTATGCTGGCCGCACAGAACTGCCAGAGAACCTCAAGGCCCTTTTCAGGCCCTGTGCAATGGTAGTTCCAGACTTTGAGCTGATCTCTGAGATTATGCTGGTAGCAGAAGGATTCATTGAAGCCCGGTTGTTGGCCAGGAAGTTCATTACCCTTTACCGGCTGTGTAAAGAACTTCTCTCCAAACAGGATCACTATGACTGGGGACTTCGTGCCATTAAGTCTGTCCTCGTGGTAGCAGGATCACTGAAACGGGGAGACCCTGACCGCCCAGAGGACCAAGTCCTGATGCGTTCTTTGAGAGACTTCAACATCCCAAAGATCGTGACAGATGACATGCCGGTGTTCATGGGTCTGATAGGTGACCTCTTTCCTGCTTTGGATGTCCCCAGGAAGAGAGATCTGGACTTTGAGGCTGTGGTTCGGAAAGCAATCGTGGACCTTAAGCTCCAGGCTGAGGACAACTTTGTGCTCAAGGTGGTCCAGCTGGAGGAGCTGCTAGCTGTAAGGCACTCTGTG---TTCGTGGTGGGCGGTGCTGGT------ACCGGGAAGTCACAGGTACTGAGGTCTTTACACAAGACCTATCAG------ATCATGAGACGTCGCCCTGTGTGGACTGACCTCAACCCCAAAGCTGTCACAAATGATGAACTCTTT---------------------------------------GGCATCATCAATCCAGCCACTCGAGAATGGAAGGATGGA---CTGTTCTCTTCCATCATGAGAGAGCTTGCCATCATCTCTCATGATGGGCCCAAGTGGATCTTACTGGATGGCGATATAGACCCGATGTGGATAGAGTCTCTGAACACAGTCATGGATGATAACAAGGTACTGACCCTGGCAAGCAACGAGAGAATCCCCCTTAACCCCACAATGCGTCTTCTTTTCGAGATCAGCCACCTGCGCACAGCCACACCAGCAACCGTCTCCAGAGCAGGGATCCTGTACATAAACCCTGCAGACCTGGGATGGAACCCTCCAGTAAGCAGCTGGATTGATCAGAGAGAAGTCCAGACTGAGAGAGCCAACTTGACCATCCTGTTTGACAAATATCTTCCTACCTGCTTGGACACCCTCAGAACCAGATTTAAGAAAATAATTCCAGTCCCAGAGCAGAGTATGATCCAGATGCTGTGCTACCTCCTTGAGTGCCTCCTGACAAAGGAGGATATCCCTGCAGACTGCCCCAAGGAAATATATGAACTCTATTTTGTGTTTGCTGCCATCTGGGCATTTGGCAGTGCTGTGATCCAAGATCAGCTTGTAGACTACCGGGCAGAGTTCAGCAAATGGTGGCTGACTGAGTTTAAAACAGTCAAGTTTCCTTCCCAAGGAACTGTCTTTGACTACTACATAGACCCAGAGACCAAGAAATTTGAGCCCTGGGCCAAGCTCATCCCCCAGTTTGAATTTGACCCAGAGATGCCTTTGCAGGCTTGTTTGGTACACACAAGTGAGACCATCCGGGTGTGCTACTTCATGGAGCGGCTCATGCAATGGAGGCGGCCGGTTATGCTGGTTGGCCCTGCAGGCTCAGGCAAGTCTGTGCTGGTGGGAGCAAAGCTGTCCAGCCTTAACCCTGAGGAATACATGGTGAAAAATGTGCCCTTCAACTACTATACTACGTCAGCAATGCTGCAAGCTGTCTTGGAGAAACCTCTAGAAAAGAAAGCTGGCAGGAATTATGGCCCTCCAGGCAACAGGAAACTCATCTATTTCATCGATGACATGAATATGCCCGAGGTGGATGCCTATGGCACAGTACAGCCCCACACTGTCATCAGGCAGCACCTAGACTATGGCCACTGGTATGATCGGAACAAGCTGTCTCTGAAGGAGATCATGAATGTACAATACATCTCCTGTATGAACCCCACTGCAGGCAGCTTTACCATCAACCCAAGGCTTCAGCGCCACTTCAGCGTGTTTGCCCTCTGCTTCCCAGGAGCTGATGCCCTCTCTTCCATCTATAGCACCATCTTGACCCATCATCTGAAGTTTGGAAACTTTCCCACCACCCTGCAGAAATCCATCCCTCCTCTGATAAACCTGGCTGTCACCTTCCATCAGAAAATTGCCACCACGTTTCTGCCCACAGCAATCAAATTTCACTACATCTTCAATCTCAGAGATTTTGCCAATATTTTCCAAGGCATTCTTTTCTCCTCCGTGGAATGTGTAAAGTCCACACAGGACCTAGTGAAACTCTATCTGCACGAGTCAAGTCGGGTTTATCGGGATAAGATGGTGGAAGAAAAGGATTTCAATCTTTTTGACAAAATCCAAACAGAATTCCTCAAGAAAAATTTTGATGATAGTGAAGAGGTGCTGAAGCAGACCCAGAACCTGAACATGTATTGTCACTTTGCAAATGGCATTGGTGAGCCCAAGTACATGCCTGTGCAATCATGGGACCTTCTGAATCAGACTCTGGTGGAAGCCCTGGAGAGCCACAATGAAGTGAATGCTGTGATGGACCTAGTTCTCTTTGAGGATGCCATACGTCACATCTGTCACATCAACCGAATCCTGGAGTCCCCTCGAGGAAATGCCCTGCTAGTTGGTGTAGGTGGGAGTGGTAAGCAGAGTCTGACAAAGCTGGCAGCTTTTATCAGTTCCATGGATGTATTCCAGATCACCCTTCGCAAAGGCTACCAAATCCCTGACTTCAAGGTGGACCTGGCCAGCCTCTGTCTGAAAGCTGGGGTAAAAAATCTCAGTACAGTGTTCCTTATGACTGATGCCCATGTGGCTGACGAGAGGTTCCTGGTGCTCATCAATGACCTCCTGGCATCTGGTGAGATCCCAGATCTCTACTCTGATGAGGAAGAGGAGAACATCATAAACAATGTGAGAAATGAGGTCAAAAGCCAGGGACTCATGGACAGCAGGGAGAACTGCTGGAAATTCTTCATAGAGAGAGTCCAGCGACAACTTAAGGTGACTCTCTGTTTCTCCCCTGTGGGGAACAAGCTGAGAATTCGAAGCAGGAAGTTCCCAGCCATTGTGAACTGTACTGCTATCAACTGGTTCCATGAGTGGCCTCAGGAGGCCCTAGAGTCTGTGAGCCTCCGATTCTTGCAGAATACAAAGAACATTGAGCCTGCGGTGAAGCAGTCAATTAGCAAGTTCATGGCCTTTGTCCACATAAGTGTCAACAAGACATCCCAGTCATACCTGACCAATGAGCAGCGATACAACTACACAACACCCAAGTCCTTTCTGGAGTTCATCAGACTGTACCAGAGCTTGCTGGAGAGAAATGGAAAAGAGCTCCAGGCCAAGGTGGAGAGGCTGGAGAACGGGCTGTTGAAACTGCACAGCACCTCGGCCCAGGTGGATGATCTGAAAGCGAAGCTTGCCACCCAGGAAGTGGAGCTGAGGCACAAGAATGAAGATACAGACAAGCTGATTCAGGTGGTGGGTGTGGAGACCAGCAAAGTGAGCAGAGAGAAAGCCATTGCTGACGAGGAGGAGCAGAAGGTGGCCCTGATCATGCTGGAGGTGCAGCAGAAACAGAAAGACTGTGAAGAGGACCTGGCTAAGGCCGAGCCAGCCCTGACCGCAGCGCAGGCGGCCCTCAACACTCTCAACAAGACCAACCTGACAGAGCTGAAGTCATTTGGTTCCCCACCTCTGGCTGTCAGCAATGTCAGCGCCGCGGTGATGGTTCTCATGGCCCCAGGGGGCAAGGTGCCCAAGGACCGCAGCTGGAAGGCTGCCAAAATCACCATGGCCAAGGTGGACAGCTTCCTGGATTCCCTAATCCACTTCGACAAGGAGAACATTCATGAGAATTGCCTCAAAGCCATCAGGCCATACCTGCAAGATCCTGCATTCAACCCAGAGTTTGTGGCCACCAAGTCCTATGCAGCTGCAGGCCTCTGCTCTTGGGTAATCAATATTGTGAGGTTCTATGAGGTCTTCTGTGATGTGGAACCAAAGCGCCAGGCTTTGAACAAAGCCACCTCAGACCTCACAACTGCCCAAGAGAAGCTGGCAGCCATCAAAGCCAAGATCACACACCTTAATGAAAACCTGGCGAAGCTCACCACCAAGTTTGAGAAAGCAACAGCAGAGAAGCTCAAGTGTCAGCAAGAAGCTGAACTGACCGCAGGCACCATTTCGCTTGCAAACCGTCTGGTTGGAGGCCTTGCATCTGAGAACATAAGGTGGGCAGAGGCTGTGCAGAACTTCAGACAGCAGGAAAGGACGTTATGTGGCGACATTCTGCTTACTACAGCTTTCATCTCCTACTTGGGCTTCTTTACCAAAAAGTACCGAAAGAGCCTCATGGATGGGACCTGGAGACCCTATCTGAGCCAACTGAAAGTTCCCATTCCAACCACCCCAACTCTGGACCCCCTGAGGATGCTAACCGATGATGCTGAAGTGGCTGCCTGGCAGAATGAGGGTCTCCCTGCTGACCGCATGTCCATGGAGAATGCTACCATCCTCATCAACTGTGAGCGCTGGCCTCTCATGGTCGACCCTCAACTGCAAGGCATTAAATGGATCAAGAACAAATATGGAGAAGAACTCCGGGTCACCCAGATTGGCCAAAAGGGCTGCCTTCAAACCATAGAGCGAGCCCTGGAAGCTGGAGATGTGGTACTGATTGAGAACCTTGAGGAGTCCATTGATCCCGTCCTGGGACCTCTGCTTGGGAGAGAAGTCATTAAGAAAGGACGGTTTATCAAGATTGGAGACAAGGAGTGTGAATTCAATCCCAAGTTCCGGCTCATCCTTCATACCAAGCTGGCCAACCCTCACTACCAGCCTGAGCTGCAGGCTCAGGCTACCCTGATCAACTTCACGGTGACCAGGGATGGCCTGGAGGACCAGCTGCTGGCTGCTGTGGTCAGCATGGAGAGACCAGACCTGGAACAGCTGAAGTCCGATCTCACAAAGCAGCAGAACGGGTTCAAAATCACCCTCAAAACCTTAGAGGACAACCTGCTATCTCGCCTCTCTTCAGCCTCGGGGAACTTCCTGGGAGAAACAGCCTTGGTGGAGAACCTGGAGGTCACCAAGCAGACTGCTGCAGATGTGGAGGAAAAGGTCCAAGAAGCCAAATTGACAGAAGTAAAAATTAATGAGGCCCGAGAGCACTATAGGCCAGCAGCTGCCCGGGCATCTCTGCTCTACTTCATCATGAATGACCTCAGCAAGATCCATCCAATGTATCAGTTCTCCCTCAAGGCCTTCAGCATTGTCTTCCAGAAAGCTGTGGAG------AAGGCAGCTCCCAGTGAAAGTGTCACAGAGCGAGTGACTAATCTAATAGACAGCATAACTTTCTCAGTGTACCAGTATACCACACGTGGCCTCTTTGAGTGTGATAAGCTGACCTACCTAGCCCAGCTCACCTTTCAGATTCTCCTCGTGAACCAGGAAGTTAATGCAGCAGAGTTGGATTTCTTGCTTAGGGCTCCAGTACAGACAGGGACTCCCAGCCCAATGGAGTTCCTGTCCCACCAGGCCTGGGGAGGCATCAAGGCACTCTCATCAATGGAGGAATTCTGCAATCTGGACCGAGACATTGAAGGCTCTGCCAAGAGCTGGAAAAAGTTTGTGGAGTCAGAGTGTCCCGAGAAGGAGAAGTTTCCCCAGGAGTGGAAGAACAAGACAGCCCTGCAGCGCCTCTGCATGATGAGAGCCATGAGGCCTGACCGGATGACCTATGCCATGCGAGATTTTGTTGAGGAGAAGTTGGGAAGCAAATACGTGATGGGAAGAGCACTCGATTTTGTAACCTCATTTGAAGAGTCAGGACCAGCCACTCCCATGTTTTTCATCCTGTCTCCAGGGGTGGATCCACTGAAGGATGTGGAAAATCAAGGAAAGAAACTTGGATATACATTCAACAATCGGAACTTCCACAACGTGTCCCTAGGGCAAGGACAAGAGGTAGTTGCTGAGGCTGCACTGGACTTGGCTGCTAAGAAGGGTCACTGGGTGATTCTGCAGAACATCCACCTGGTGGCCAAGTGGCTCAGTACCCTGGAGAAGAAACTGGAGGAGCTCAGCGAGGAAAGTCACCCAGACTTCAGGGTCTTCATCAGCGCAGAGCCTGCACCCTCCCCTGAGGGCCACATCATTCCCCAGGGCATTCTGGAAAACTCCATTAAGATCACCAATGAGCCTCCCACAGGCATGCACGCCAACTTACACAAAGCCCTGGACAACTTCACTCAGGACACTCTGGAGATGTGTTCCCGGGAGACAGAGTTCAAGACCATCCTCTTTGCTTTGTGCTACTTTCATGCGGTGGTTGCCGAGAGACGGAAGTTTGGGCCACAGGGCTGGAATCGGTCCTATCCATTTAACACTGGGGACCTCACCATCTCTGTGAATGTGCTATATAATTTCCTGGAGGCTAACACAAAGGTACCCTATGACGACTTGCGTTACCTGTTCGGTGAGATCATGTACGGTGGCCATATCACAGATGACTGGGACAGGAGACTCTGCAGAACCTATTTAGAGGAATTCATTCGGCCAGAGATGCTAGAAGGAGAGCTCTCCCTGGCCCCGGGGTTCCCACTCCCAGGAAACATGGACTACAGTGGCTATCACCAGTATATTGATGCTGAGCTGCCCCCTGAGTCTCCCTACCTATATGGCCTCCATCCAAACGCCGAGATTGGCTTCCTGACCCAGACATCGGAAAAACTCTTCCGAACCGTGCTGGAGATGCAGCCTCGGGACAGCCAGGCTGGAGATGGAGCTGGCATCACAAGGGAAGAAAAGGTCAAAACCTTTCTGGAAGAAATACTGGATCGGATGACAGATGAATTTAACATCGCGGAGCTAATGGCTAAGGTGGAGGAACGCACCCCCTACATTGTAGTTGCCCTCCAGGAGTGTGAACGCATGAACATCCTTACCAGAGAGATCCAGCGCTCACTAAGAGAGCTGCATCTTGGCTTACAGGGGGAGCTGACCATGACCAGTGAGATGGAAAACCTACAGAATGCCCTATATCTAGATGTGGTCCCAGAGTCCTGGGCCAGGCGAGCCTACCCTTCCACAGCAGGCCTGGCAGCCTGGTTTCTAGACCTGCTTAACAGAATCAAAGAGCTGGAGTCCTGGACAGGCGACTTCTTGATGCCCTCAACTGTGTGGCTGACGGGCTTCTTCAACCCCCAGTCCTTCCTGACTGCCATCATGCAGTCCATGGCCCGCAAGAATGAATGGCCACTAGACCAGATGGCCCTGCAGTGTGATGTGACAAAGAAGAACAGAGAGGAGTTCCGGAGCCCTCCTCGGGAAGGGGCCTACATCTATGGGCTCTTCATGGAAGGTGCCTGCTGGGACACACAGACTGGGATCATTGCAGAGGCGAAACTGAAGGACCTGACACCCCCCATGCCTGTGATGTTCCTCAAGGCCATTCCAGCAGATAAGCAAGATTGTCGAAGTGTCTATGCTTGTCCTGTGTACAAGACTTGTCAGCGGGGACCCACCTACGTGTGGACTTTCAATCTGAAGACTAAAGAAAACCCATCCAAGTGGGTTCTGGCTGGTGTTGCCTTGCTTCTCCAGATT",
"Opossum": "ATGCCAGTGAAAGTG------GCGGCGGTGTTCTCCGAGGCTGAGGATGGAGATAGCGAGAAAGGG---CCAGATCCTAGAATACGGCTCCTGGGCACCTACGTGATTCGGTGCTTGCGGCCAGCGGCTGGCGCCTGGAAGCGCTGTGTGAGCACGGCCGAATCTGAGCAGCTGCTCCAGGCCTTCCTGACAGGCTACTGCCCTCAAGAG---------CCG------ATGTTGGTGGTGCGTCCTGGGGGTGGGGGTCTGCAGCTGAGCTCCGGG------------CCATACCCCGATTTGTCTCGAGCCAAAGCGCTGTTCTTCCTGCGC---GGTTCCCCCAATCCCCCAGGACCCTTGGGTCCCAAAGGCGCAGTGCTCTGCGGGGAGCTGGCCCCGGCACCGTTGGAGCATCTGGCCACGCTGGTGGCTGAGGTAGTGATGCCTGTCCTGACCAATGAAAAGAATCACCAAGACTGGCCAAATGCTTTATATCAGGATGTAAGACGACACATCCACCACCTTCAAAGTGACCTCATTATCTTCCTGGGTCAAGTGACAGGAAAAACATTGCTGCCTCTTCCAGTAGGCTCAGAAAACATGGAATGTGCGGATTATGAAAATGAAAAAGACTTGGATTTCACAGATAAATCAATTGTCTATGCTATTGAGTCCACAGTGATCAAATGGAGCCACCAGATTCAAATAGTATTAAAGAGGGAATCTTCAGAACCACTCTTGCAGGGAGGAAATCCCACCCCAAAAGTGGAGCTGGAATTCTGGAAGAGAAGGTGTGAAGATCTGGAATATATTTATAATCAGCTGAGAGCCATAGAAGTGAGGGGCATAGCTGCACTTTTAAACAGACTTCAGAGCAGCTACTTCCCAGCCTTCAAAACCATGTTCAAAGATGTAATAGCAGCTTTAAGAGAGGCCCAGGATATCTATGTACATCTGAAGCCCCTGCAACGCCATGTGGAAAACATAGAGAATGTGGAATTTAGTGAAGTGAAGCCACTGGTGGGACCCCTGCTTCACGTAATCTTCTTGATTTGGTCCACATCTAAATACTACTGCTGTCCAGTACGGATCATTGTGCTACTGCAGGAAATATATAACCTTCTCATTCAGCAGGCCTGTACATATCTTAGCCCAGAAGATCTACTGAAAGGGGAGATAGAAGAGAGTCAGAGGAAAATACAGGTGGTGATGGACATTCTTAACTTTTCCAAAGAGATGTTTGAGAATAGGAGGAACAATCTCAAGACTTACTTCAAACAGGTCCGGGAGGTGAAGGAATGGGATTTCAATTCTTCTATGATCTTTGTGCGACTAAATAACTTCCTCAAAAGACTCAAGATGGTAGAGAATCTTCTGAAGACAACCTTGGATTTTCTCAAGCTAGAGAAACTTGAATTCAGTGGAATTAGAGGGAATGCTTTGAGTCAACAAGTCCAGGGCATGTATGAAGAATTCCAGAATGTGTACAAAGCCTTTTCAGAATGCTTCTATGACTGTCTGGACCCTAAACACACAGAATTCGAAAATTATGTTGCTGAATTCAACCACAAAGTAGAAGATTTGGACCGAAGATTAGGGATGGTCTTCCTCCAAGCTTTTAATGATGTATCTGGCTTAGAGCATGCTTTTAAGTTGCTTGAAATATTTGGGAGTCTGCTTGAAAGACCAGTAGTAGCCATGGATGTATTTGATAAATACCCAAGACTGATCACAATGTTCAGCAGTGACTTGGATGCTGTTAAGACAATCTATTGTCAGCATGTCCAGGAGGAGTCAGAACTTGGGTTTTCCCAATTACACAAGAACATGCCGGCAGTAGCTGGGGGTCTCCGCTGGGCTCAGGAACTCAGAGAACGAATTGAAATTTCATTCAATAACCTAAGACATATCAATCACCCCTATATGGAGTCCACTGAAAGTAAAGAGATGTTCCAAAAGTATGGTGAAATGTTTACATTGTTAGAAAAGTATGAGACAAAACTTTATGATAGCTGGTGCCAGACAGTGTCAGAGAAGTCACAATACAATCTCACTCAGCCACTCCTCTGTCATGATCCAGAAACTAAGCAGATCATTGTCAACTTTAACCCACAG---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAAAC---ATGAAA------------------------------------------------------------------------------------------------------------------------------------------------------CTCTTCCTTGCAGATCCAGCATCCAGTATGTGGAAGGCATACATTGACTACCTTGATAACATGGTTCTTCATGGATTCTTCATTGCTATTGAATCTTCTCTAAATTATCTCCTGGAAAATACTGATTCTAAGAACAGACTTCCTCCCTTTTTTGAAACACAATTGGATCTAGTTATACCAGAACTGATCTTTTGTCCCTCTCTGGACCCTTTTGCTAATGGTGGTTTTCAAAGCATTGTGGAGGGACTTATCAATGACATTTTCAAAATATCTTCTATGGTCCCACGACTT---GCACAGAAAAGTTCCCCTCATTATCAGGCTGACATGGAGGACACAGAAGATCTGTCAAACCTGAGGAATATATTAATGGAGAGAGTCAAGAATATGATGACCATCTGCTGTGACTATCGGAACTCCTTTGACCATTACTCTTTCCTTTACATGGATGACCGAAAGGAATTTATGCGACAATTTCTTTTATATGGACATATGCTCACACCAGAGGATTTAGAAGCCCATGGAGAAGATGAGATCCCTGAAAATCCTCCCACTCTTCAGCAATTTAAATTGCAGATTGATTCCTATGAAAAGATTTATGAAGAGGTGAACCAGCTAGAGCCTTTCAGAATTTTTGACAACTGGATGAAAATTGATATTCGTCCATTTAAGATGACTCTACTAAATGTAATTAAGAGATGGAGCTTCATGCTTAAACAGTATCTCATTGATCATGTCACTCACAGTTTGGCAGACCTTGAAGTCTTTATAAGAAATGGTGAGAGTGGTTTAAACAAGAAGATAGAAAAAGGTGATTTTGCTGGTTTGGTTGAAATTATGAGAACTCTAATGGCTCTTAAAGAACGACAGAGCAGCACTGATGAGATGTTTGAGCCACTGAAACACACAATTGATTTACTGAAGATCTATGAACAAGAATTACCTGATTCAGTATTTAAACAGCTAGAGGAATTGCCAGAAAAATGGAGCAACATAAAGAAGATGGCAGTCATTGTAAAACAGCATGTAGCTCCCCTGCAGGAAGATGAAGTAACAGCTCTCCGTCAAAAATGTGCCATTTTTGCTATTGAACAGAATATATTCCAAGATCAGTTCCACAAAGAAGCTCCTTTCAGGTATGATAGCATTAAGCCTCACCAAGTCTTGGATGCCAAAAACATTCAAATCCAACAGATGGAATCTTTCATGTCCTCCATTTCAGACTCTACAAATTTGTTTGAAGTCAACATTCCTGATTACAAACATTTAAAACAATGCAGGAAGGAGATCTGCTTGTTGAAGGAGCTTTGGGATATGATTGACCTAGTGAATTTTAGCATCAATAACTGGAAGGTAACCAAATGGAGGAATATTAATGTAGAGAATATGGACTTAGAATGTAAAAGTTTTGCCAAACAAATATGGAAACTTGGCAAGGATGTGAGAGCCTGGGATGCTTTCATGGGGCTGGACAACACTCTGAAGAACATCCTAACATCCTTACGGGCTGTGTCTGAACTTCAAAATCCAGCCATAAGAGAGAGGCACTGGAACCAGCTGATGCAGGCTACAGGTATGAAATTCATCATGAATGATGATACCACCCTTGAAGACTTACTTAAGCTTGAGCTACACAACTTTGAAGAGGCGATCCAGGGCATTGTAGACAAAGCTGTGAAAGAGATGGACATGGAAAAAGTTCTAAAGGAACTAAAAGCCACTTGGGCAGGGATGGAATTTCATTATGAGCCCCACCCCCGGACAATGGTCCCACTGATGAGGTCTGATGAAGACCTTATTGAAACTCTTGAAGATAACCAAGTCCAGCTGCAGAATCTAATGACATCCAAGTACATAGCCTTCTTCCTGGAGGAAGTATCTGACTGGCAGAAGAAGCTCTCAATAGCTGATGCTGTCATTTCAATCTGGTTTGAAGTACAGCGTACATGGTCTCATCTTGAAAGCATTTTCATTGGATCTGAAGATATACGAGCACAGCTTCCTAAGGACTCTAAACATTTTGAAGACATTGATACTGATTTTAAAGAGTTAGTTTGTGATGCTCAGAAGACACCAAATGTGGTAGAAGCTACCAACAAACCAGGTGTTCATGAACAATTAGAAGATATTCAGAACAGGTTGAGCCAGTGTGAGAAAGCCTTGACTGAATATCTAGATACCAAGAGACTGGTCTTCCCCAGGTTTTATTTCCTCTCCTCTTCCGACCTTTTAGACATCCTTTCCAACGGCACAAATCCACAACAAGTTCAACGCCATCTTTCTAAGTTATTTGACAGTATGGCTAGGATGAAGTTCCAGGTGGACTCCAGTCAAAAACCAACCAAGAAAAGCCTCGGCATGTATAGCAAAGAGGAAGAGTATGTGAATTTCAGTGAGCCTTGTGACTGTAGTGGCCAGGTTGAACTCTGGTTGAATAATGTGCTCAATCACATGAGGGCCACCGTGAGACATGAAATGACAGAAGGTGTCACTGCCTATGAAGAGAAACCTAGGGAACAGTGGCTCTTTGACTACCCTGCTCAGGTGGCCTTGACATGTACCCAGATCTGGTGGACCACTGAGGTGGGAATTTCCTTTGCCAGGCTGGAAGAAGGATATGAAAGTGCCATGAAGGACTATTATAAGAAACAAGTTACCCAACTGAACACCCTGATCACCATGCTAATTGGCCAGCTCTCCATGGGTGACCGTCAAAAAATCATGACCATCTGTACTATTGATGTGCATGCTCGAGATGTAGTTGCCAAGATGATCTCTCAGAAGGTAGAGAACACTCAGGCTTTTCTCTGGATGTCCCAGATGCGCCATCGGTGGGATGATGAGAAAAAGCACTGCTTTGCCAATATTTGTGATGCTGAATTCCTGTATTCCTATGAGTACCTGGGTAACACACCTCGCCTGGTGATTACCCCTCTGACA------GACAGATGTTACATTACCCTCACCCAGTCTTTGCATTTGACCATGAGTGGGGCTCCAGCAGGACCTGCAGGCACTGGCAAAACTGAGACTACCAAAGACCTGGGTCGAGCATTGGGTATCATGGTATATGTGTTTAACTGTTCTGAGCAAATGGACTACAAGTCTTGTGGCAATATCTACAAAGGCCTTTCTCAGACTGGTGCCTGGGGCTGTTTTGATGAATTTAACAGAATTTCTGTGGAGGTTCTTTCCGTGGTGGCAGTACAGGTGAAAAGTATTCAAGATGCAATCAGAGATAAGAAGCTGAATTTCAATTTCCTTGGAGAGGAGATTAAATTGAATCCCTCAGTGGGAATCTTCATCACTATGAACCCAGGATATGCTGGCCGTACAGAGCTTCCAGAGAACCTTAAGGCTCTCTTCAGGCCTTGTGCTATGGTGGTACCAGATTTTGAATTGATCTGTGAAATCATGTTGGTTGCTGAAGGATTCATCGAGGCACAATCATTGGCCAAAAAATTCATTACCCTTTATCAGCTTTGCAAAGAGCTCCTCTCCAAACAGGACCATTATGACTGGGGTCTGCGAGCTATCAAGTCTGTGCTTGTGGTAGCAGGATCTCTGAAGAGAGGAGACCCTGACCGACCTGAAGACCAAGTCCTAATGCGTTCCCTCAGAGACTTCAATATTCCCAAAATCACAACTGATGACATGCCTGTGTTTATAGGCCTAATTGGAGACCTTTTCCCTGCCCTTGACATCCCCAGAAAGAGAGACCTTCACTTTGAAAGTTTTGTAAAACAGGCAATATTAGAGCTAAAACTTCAGGCTGAGGACAACTTTATGCTCAAAGTGGTACAGCTAGAAGAACTTTTGGCTGTGCGACACTCTGTG---TTTGTGGTGGGGAATGCTGGT------ACAGGAAAATCCCAGGTACTAAGATCTTTGCAAAAGACATACCAG------AACATGAAACGACGACCAGTCTGGACTGATCTCAACCCCAAAGCTGTCACTAATGATGAACTATTT---------------------------------------GGCATCATCAACCCAGCCACAAGAGAATGGAAAGATGGA---CTTTTCTCATCAATCTTGCGTGAACTTGCCAATATAATACATGATGGGCCCAAGTGGATTTTACTAGATGGCGATATTGATCCAATGTGGATTGAATCTTTGAACACTGTCATGGATGATAACAAGGTTCTGACTCTGGCAAGCAATGAAAGAATCTCTCTCAATCCAACTATGCGGCTCCTGTTTGAGATCAACCACTTGCACACAGCTACCCCTGCCACAGTCTCCAGAGCAGGAATACTATACATCAATCCTGCAGACCTGGGTTGGAATCCACCAGTGAGTAGTTGGATTGACAAGAGAGAGATACAGTCTGAACGAGCTAACCTGACCATCCTATTTGACAAATATTTGCCACCTTGCCTGGATGTAGTCAAAACAAGATTTAAAAAGATTGTTCCAATACCAGAGCAGAGCATGATTCAAATGCTGTGTTACATTCTTGAGTGTCTTCTAACAAAGGAAAACAGTCCCCCAGACTCTCCCAAGGAACTTCATGAACTTTATTTTGTGTTTGCTTCTATCTGGGCTTTTGGTGGAGTATTGATCCAAGACCAGCTTGTGGATTACAGAGCAGAGTTCAGTAAATGGTGGATAACTGAATTCAAGACAATCAAGTTTCCTTCCCAAGGAACAATCTTTGACTTTTACATAGAACCAGAAACAAAGAAATTTGAGCCGTGGTCCAAACTTATCCCCAAATTTGAATTTGACCCAGACCTACCTTTGCAGACTTGCCTGGTACACACTGTTGAGACCATTCGTGTGTGCTATTTCATGGAGCAGCTCCTGAAACACCGGAGACCTATCATGTTGGTGGGGAATGCAGGCACTGGGAAATCTGTTCTGGTAGGGGCAAAATTGGCTACCCTGGATGCAGATGAATACATGGTGAAGAATATCCCATTTAATTATTATACTACATCTGCAATGCTACAAGCTGTCCTAGAGAAACCTCTAGAAAAGAAAGCTGGAAGAAATTATGGCCCACCTGGCACCAAGAAACTCATCTATTTCATTGATGATATGAACATGCCTGAGGTAGATGCATATGGAACAGTGCAACCCCACACACTCATCAGACAGCATATGGACTATGGGCACTGGTATGACAGAAATAAACTGTTCCTCAAAGAAATCATGAATGTACAATATGTGGCCTGTATGAATCCGACAGCTGGTGGCTTCTCTATCAATCCTCGGCTACAGCGTCATTTTAGCATCTTTGTGCTCTCCTTCCCTGGAGTAGATGCATTGTATTCAATCTATAGCACCATCTTGACTCAGCATCTAAAACTTGGTAACTTTCCAGCATCACTGCTGAATTCCACCCCCCGACTCATTAACTTGGCCATTACCTTCCATCAGAAGATTGCTGCCACTTTTCTCCCCACAGCAATAAAATTCCATTATATCTTCAATCTCCGAGATTTCTCCAACATTTTCCAAGGCATTCTCTTTTCAACAGTGGAAAGTGTTAAAACCACATCAGACCTTGTGAAGCTCTATCTTCATGAGTCCAATCGGGTTTATCGCGATAAGATGGTTGAAGACAATGACTTTGACAACTTTGATAAAATCCAAATTGAAGTGGTAAATAATTTCTTTGATGATATGGACAAGACTCTAGAGGAAATCAAGAGATTGAATATGTACTGCCACTTTGCAAATGGTATTGGTGAGCCCAAATATATGCCAGTGAAGACATGGGAACTGCTTACCCAAATCCTGGTGGAAGCCTTAGAGAACCACAATGAAGTCAATCAAGTGATGAATCTGGTTCTTTTTGAGGATGCCATGTGCCATGTTTGTCGTATCAATCGCATCCTAGAATCCCCAAGGGGGAATGCTTTGCTGGTTGGAGTAGGTGGAAGTGGCAAACAGAGCCTGACAAGACTTGCAGCTTTTATCAGCTCCATGGATGTTTTCCAGATCACTCTAAGAAAAGGTTATTGTATTTCTGATCTTAAGATGGATTTAGCCAATCAGTGCCTAAAAGCTGGAGTAAAGAATGTAAGCACTGTATTTCTCATGACAGATGCCCAAGTTGCTGATGAAAAGTTCCTTGTCCTTATCAATGATCTGTTGGCATCTGGAGAGATTCCAGATCTATACTCTGATGATGAAGTTGAAAACATCATAAACAATATGAGAAATGAAGTCAAGAGCCTGGGTTTGTTTGACAGCAGGGAAACTTGCTGGAAAGTCTTCATAGAAAGGGTCCAAAAACAATTAAAGGTTATACTCTGCTTCTCCCCTGTGGGGAATAAGCTAAGAGTCCGCAGCAGGAAATTTCCAGCCATTGTGAACTGCACAGCCATTGATTGGTTTCATGAGTGGCCTCAGCAAGCACTAGAATCAGTGAGCCTGCGCTTCTTACAAAACATAGAGAATATTGATCCAGCAGTAAAAGAGTCAATTAGTAAATTCATGGCTTATGTACACACGAGTGTCAACCAAATGTCCCAGTCCTACCTGAGCAATGAACGGCGCTATAATTACACCACCCCAAAATCCTTCCTGGAACAGATCAGACTCTATCAGAACCTACTGGTCAAGAATGGCAATGAGCTGACATCTAAAATGAAGAGGCTTAAGAATGGACTGCAGAAGCTTCACAGTACATCTTCCCAGGTAGATGACCTGAAAGCTAAACTGGCACTCCAGGAAATAGAGCTCAGGCAGAAGAATGAAGATGCAGACAAACTAATTCAAGTGGTTGGAGTAGAGACAGAGAAAGTAAGCAAAGAGAAAGCCATTGCTGATGAAGAGGAACATAAAGTGGCTCTAATCATGTTGGAGGTCAAGCAGAAGCAAAAGGACTGTGAGGAAGATCTGGCCAAAGCAGAGCCATCTCTCACAGCAGCCCAAGAAGCTCTCAACACACTCAATAAGACCAACCTAACAGAGCTCAAGTCTTTTGGTTCACCACCTTTGGCTGTCAGCAATGTCACTGCTGCAGTAATGGTTCTCATGGCCCCTGGAGGGAAGGTACCCAGAGATCGAAGTTGGAAAGCTGCCAAGGCTACCATGGCCCGAGTTGATGGCTTTCTGGACGCCCTGGTCAACTTTAACAAAGAGAACATACCTGAGAGCTGCCTCAAAGCTATCCAACCATATATTCAAGATCCAGAATTTAAACCTGAGTTTGTGGCCTCTAAGTCTTTTGCAGCAGCTGGCCTCTGTTCCTGGGTCATAAATATTGTGAGGTTTTATGAGGTTTTCTGTGATGTGGAACCCAAGCGGCAAGCCCTGAGTAAAGCAAGTTTGGATCTTGCTATTGCCCAAGAAAAATTGGCAACCATTAAAATCAAGATTGCTCACCTTAATGAAAACTTGGCAAAACTTACAACCAAATTTGAGAAAGCAACTGCAGAAAAACTCAAATGTCAGCAAGAAGCTGAACTGACCACAGGTACCATCTCACTTGCAAATCGCCTGGTTGGAGGTCTTGCCTCTGAAAATGTAAGATGGGCAGAAGCTATTAAGGACTTCAGACAGCAAGAGAATACATTGTGTGGAGATATTTTACTGATTACAGCTTTCATTTCTTACCTGGGATACTTTACCAAGAAATATCGTCAAAATCTTATGGATTGTAGCTGGAGACCTTACCTGAATCAGTTAAAAGTGCCTTTTCCAGTCACCCCTACTTTAGATCCTCTAAAGATGCTTACTGATGATGTAGACATAGCCACCTGGCAAAATGAGGGTCTTCCTGCTGACCGTGTGTCCACAGAGAATGCTACAATCCTTATCAACAGTGAACGATGGCCACTTATAGTTGACCCTCAACTACAAGGAGTCAAATGGATTAAGAAGAAATATGGCAAAGACCTTCGAGTCATCCAGATTGGAGAGAAAGGATACCTTGATATCATAGAACATGCCCTAGCAGATGGTGATGTAGTGCTGATTGAAAACATAGGAGAGACAGTGGACCCTGTTCTAGGACCCTTGTTGGGAAGAGAAGTAATTAAAAAAGGAAGATTCATTAAAATTGGAGACAAGGAGTGTGAATATAATCCCAGGTTCCGCCTCATTCTTCACACCAAGCTTGCCAATCCTCACTATCAGCCTGAGCTCCAAGCCCAGGCAACGCTTATCAACTTCACTGTGACTAGAGATGGCTTAGAGGATCAGCTTCTGGCAGCAGTGGTCAACATGGAGAGGCCAGACTTAGAAGAACTGAGATCAGATCTGACAAAGCAACAGAATGCATTCAAGATCACACTGAAAACCTTGGAAGACAACTTGCTGTCTTGCCTCTCCTCCGCATCTGGGAACTTCCTGGGAGATATGGCTTTAGTAGAAAACTTAGAGATCACCAAAAAGACAGCTACAGAAATTCAGAAGAAGGTCCAGGAAGCCAAGATAACAGAAATAAAAATTAATGATGCCAGAGAGCATTACCGGCCAGTGGCAGCACGTGCGTCTTTGCTGTACTTCATCATGAATGACCTCAGTAAAATCCATCCCATGTATCAGTTTTCTCTCAAGGCTTTCAGCCTTGTCTTCCAAAAGGCTATACAGAAGACCAAGGCAGATGCTGATGAGAACCTCCAACAGAGAGTAGTCAGTTTGATTGACAACATCACCTTCTCTGTATATCAGTATACTACTAGGGGACTTTTTGAATGTGATAAGCTGACTTACATCACCCAGGTCACCTTTCAGACACTCATGATGAATCAGGAAATCAATGCTGCTGAATTGGACTTCCTTCTCCGGTACCCAGCACAGTCCAATGTTCTGAGCCCTGTGGATTTCCTTTCCAACCAATCCTGGGGAGGTATCAAGACTCTTTCATTAATGGAAGAATTTTGTAATCTGGATCGAGACATTGAAGGGTCTGCCAAACATTGGAAAAAATTTGTGGACTCAGAGTGTCCTGAAAAGGAGAAATTCCCCCAAGCATGGAAAAACAAGTCATCTCTACAAAAACTGTGTATGATGAGGGCAATGAGACCTGACCGAATGACATATGCTATGAGAGATTTTGTGGAAGAAAAGCTAGGAAGCAAATATGTGGTGGGCAGAGCATTAGATCTCTCTACCTCTTTAGAAGAATCAGGGTCTGCAACTCCCATGTTCTTCATACTGTCGCCAGGTGTTGACCCACTGAAAGATGTGGAGAAGGAAGGGAAGAAACTTGGTTATACCTTCAACAACCAGAACTTCCATAATGTGTCCTTGGGTCAAGGACAAGAGGTAGTAGCAGAGGCTGCATTGGATCTGGCTGCCAAGAATGGCCACTGGGTTATCTTACAGAACATCCATCTGGTAGCCAAATGGCTTGGATCCCTTGAGAAGAAGTTGGAACAACATAGCAAGAGCAGTCATCATGAGTTCAGAGTCTTCATGAGTGCTGAGCCTGCAGCTTCTCCTGATGGTCACATCATTCCCCAAGGCATCCTAGAAAATTCAATAAAGATCACTAATGAGCCTCCCATGGGCATGCATGCCAAACTGCACAAGGCCCTGGACAACTTCACTCAGAATACTCTGGAAATGTGTACCCGAGAAACAGAGTTTAAGAGTATTTTATTTGCACTTTGTTACTTCCACGCTGTTGTAGCAGAAAGGCACAAATTTGGACCTCAAGGCTGGAATCGCACTTACCCTTTTAACACTGGAGACCTTACCATCTCAGTGAATGTGCTCTATAACTTTCTTGAGGCCAATGCCAAGGTACCCTATGATGACTTGCGCTATCTCTTTGGTGAGATCATGTATGGAGGTCACATCACAGATAACTGGGACAGGAGGCTTTGTAGAACATACCTGGAGGAATTCATTAAGCCAGAAATGTTAGAAGGAGAAATGTTTCTGGCTCCAGGGTTCCCGATGCCAGGCAATATGGATTACAACAGTTATCACCAGTTCATTGATGACATGCTGCCAACAGAGTCACCATATTTGTATGGGCTTCACCCCAATGCTGAAATTGGCTTTCTGACCCACACCGCAGAAAAGCTCTTCCATACAGTGCTAGAAATGCAGCCTCGGGACAGCCAAGATGGAGATGGAGGAGGGATCACAAGAGAAGAAAAGGTAAAAGCCTTTCTGGATGAAATATTAGAGAAGATAACTGAAGAGTTTAACATTGCAGAGTTGATGGCTAGGGTAGAAGAACGTACCCCCTACGTTGTGGTTACCTTTCAAGAATGTGAAAGAATGAACCTCCTTATCAGAGAAATACAGCACTCGTTGAAAGAACTGGATCTGAGTCTGAAGGGTGAGCTAACAATGACCAGCAACATGGAGAGCTTACAGAGAGCTCTGTACCTAGATACTGTACCAGCGTCGTGGGCCAAGAAAGCATACCCATCAACAGCAGGCCTGGCAAGCTGGATTGTGGATCTGCTCACCCGAATTAAAGAGCTGGAAAGATGGATGGGAGACTTTGCATTACCCTCTGCTGTCTGGCTAGCAGGATTTTTTAACCCCCAATCATTCCTAACGGCCATCATGCAGTCCATGGCTCATAAGAATAAGTGGCCACTGGATAAGATAGCCCTGCAGTGTGAAGTGACTAAGAAGAATCGGGAAGATTTCAGTAGCCCACCTCGGGAAGGGGCCTATATCTATGGTCTGTTCATGGAAGGGGCCTGTTGGGATACACAGGCTGGAAATATCACAGAGGCTAGACTGAAGGATCTGACTCCACTCATGCCTGTGATATTCATCAAAGCTGTCCCTGTGGACAAGCAAGACAATCGAAATATCTATCCTTGTCCAGTATACAAGACTTGTCAGCGGGGACCCACTTATATTTGGACATTTGGTCTGAAAACTAAAGAAGCTCCATCCAAATGGGTATTAGCTGGTGTGGCCTTGCTTTTGCAGATT",
}
class MakeCachedObjects:
def __init__(self, model, tree, seq_length, opt_args):
"""simulates an alignment under F81, all models should be the same"""
self.lf = model.make_likelihood_function(tree)
self.lf.set_motif_probs(dict(A=0.1, C=0.2, G=0.3, T=0.4))
self.aln = self.lf.simulate_alignment(seq_length)
self.results = dict(aln=self.aln)
self.discrete_tree = make_tree(tip_names=self.aln.names)
self.opt_args = {**opt_args, **{"show_progress": False}}
self.tree = tree
def fit_general(self, **kwargs):
optargs = self.opt_args.copy()
# optargs.update(kwargs)
if "general" in self.results:
return
gen = General(DNA.alphabet)
gen_lf = _make_likelihood(gen, self.tree, self.results)
gen_lf.optimise(**optargs)
self.results["general"] = gen_lf
return
def fit_gen_stat(self, **kwargs):
optargs = self.opt_args.copy()
# optargs.update(kwargs)
if "gen_stat" in self.results:
return
gen_stat = GeneralStationary(DNA.alphabet)
gen_stat_lf = _make_likelihood(gen_stat, self.tree, self.results)
gen_stat_lf.optimise(**optargs)
self.results["gen_stat"] = gen_stat_lf
def fit_constructed_gen(self, **kwargs):
optargs = self.opt_args.copy()
optargs.update(kwargs)
if "constructed_gen" in self.results:
return
preds = [
MotifChange(a, b, forward_only=True).aliased(f"{a}/{b}")
for a, b in [
["A", "C"],
["A", "G"],
["A", "T"],
["C", "A"],
["C", "G"],
["C", "T"],
["G", "C"],
["G", "T"],
["T", "A"],
["T", "C"],
["T", "G"],
]
]
nuc = NonReversibleNucleotide(predicates=preds)
nuc_lf = _make_likelihood(nuc, self.tree, self.results)
nuc_lf.optimise(**optargs)
self.results["constructed_gen"] = nuc_lf
def fit_discrete(self, **kwargs):
optargs = self.opt_args.copy()
optargs.update(kwargs)
if "discrete" in self.results:
return
dis_lf = _make_likelihood(
DiscreteSubstitutionModel(DNA.alphabet),
self.discrete_tree,
self.results,
is_discrete=True,
)
dis_lf.optimise(**optargs)
self.results["discrete"] = dis_lf
def __call__(self, obj_name, **kwargs):
if obj_name not in self.results:
funcs = dict(
general=self.fit_general,
gen_stat=self.fit_gen_stat,
discrete=self.fit_discrete,
constructed_gen=self.fit_constructed_gen,
)
funcs[obj_name](results=self.results, **kwargs)
return self.results[obj_name]
class NonStatMarkov(TestCase):
"""test discrete and general markov"""
tree = make_tree(treestring="(a:0.4,b:0.4,c:0.6)")
opt_args = dict(max_restarts=1, local=True, show_progress=False)
make_cached = MakeCachedObjects(TimeReversibleNucleotide(), tree, 100000, opt_args)
def _setup_discrete_from_general(self, gen_lf):
discrete_tree = self.make_cached.discrete_tree
dis_lf = _make_likelihood(
DiscreteSubstitutionModel(DNA.alphabet),
discrete_tree,
dict(aln=self.make_cached.aln),
is_discrete=True,
)
for edge in self.tree:
init = gen_lf.get_psub_for_edge(edge.name)
dis_lf.set_param_rule("psubs", edge=edge.name, init=init)
dis_lf.set_motif_probs(gen_lf.get_motif_probs())
return dis_lf
def test_discrete_vs_general1(self):
"""compares fully general models"""
gen_lf = self.make_cached("general", max_evaluations=2)
gen_lnL = gen_lf.get_log_likelihood()
dis_lf = self._setup_discrete_from_general(gen_lf)
assert_allclose(gen_lnL, dis_lf.get_log_likelihood())
def test_paralinear_consistent_discrete_continuous(self):
"""paralinear masure should be consistent between the two classes"""
gen_lf = self.make_cached("general", max_evaluations=2)
dis_lf = self._setup_discrete_from_general(gen_lf)
ct_para = gen_lf.get_paralinear_metric()
dt_para = dis_lf.get_paralinear_metric()
keys = sorted(ct_para)
assert_allclose([ct_para[k] for k in keys], [dt_para[k] for k in keys])
def test_general_vs_constructed_general(self):
"""a constructed general lnL should be identical to General"""
sm_lf = self.make_cached("constructed_gen", max_evaluations=25)
sm_lnL = sm_lf.get_log_likelihood()
gen_lf = self.make_cached("general", max_evaluations=0)
rules = sm_lf.get_param_rules()
gen_lf.apply_param_rules(rules)
gen_lnL = gen_lf.get_log_likelihood()
assert_allclose(sm_lnL, gen_lnL, rtol=0.1)
def test_general_stationary(self):
"""General stationary should be close to General"""
gen_stat_lf = self.make_cached("gen_stat", max_evaluations=25)
gen_lf = self.make_cached("general", max_evaluations=25)
gen_stat_lnL = gen_stat_lf.get_log_likelihood()
gen_lnL = gen_lf.get_log_likelihood()
self.assertLess(gen_stat_lnL, gen_lnL)
def test_general_stationary_param_list(self):
"""general stationary returns parameter list"""
gs = GeneralStationary(DNA.alphabet)
params = gs.get_param_list()
self.assertTrue(params != [])
def test_general_stationary_is_stationary(self):
"""should be stationary"""
gen_stat_lf = self.make_cached("gen_stat")
mprobs = gen_stat_lf.get_motif_probs()
mprobs = array([mprobs[nuc] for nuc in DNA.alphabet])
for edge in self.tree:
psub = gen_stat_lf.get_psub_for_edge(edge.name)
pi = dot(mprobs, psub.array)
assert_allclose(mprobs, pi)
def test_general_is_not_stationary(self):
"""should not be stationary"""
gen_lf = self.make_cached("general", max_evaluations=5)
mprobs = gen_lf.get_motif_probs()
mprobs = array([mprobs[nuc] for nuc in DNA.alphabet])
for edge in self.tree:
psub = gen_lf.get_psub_for_edge(edge.name)
pi = dot(mprobs, psub.array)
try:
assert_allclose(mprobs, pi)
except AssertionError:
pass
def test_strand_symmetric(self):
"""StrandSymmetric should fit a strand symmetric model"""
warnings.filterwarnings("ignore", "Model not reversible", UserWarning)
taxa = "Human", "Mouse", "Opossum"
aln = make_aligned_seqs(data=_aln, moltype=DNA)
aln = aln[2::3].no_degenerates()
tree = make_tree(tip_names=taxa)
model = StrandSymmetric(optimise_motif_probs=True)
lf = model.make_likelihood_function(tree)
lf.set_alignment(aln)
for param, val in [
("(A>G | T>C)", 2.454),
("(A>T | T>A)", 1.5783),
("(C>G | G>C)", 0.6687),
("(C>T | G>A)", 6.7026),
("(G>T | C>A)", 0.9219),
]:
lf.set_param_rule(param, init=val)
order = "ACGT"
S = array([[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 0, 0]])
P = empty((4, 4))
for edge in taxa:
Psub = lf.get_psub_for_edge(edge)
for i in range(4):
for j in range(4):
P[i, j] = Psub[order[i]][order[j]]
numpy.testing.assert_almost_equal(P, S.dot(P).dot(S))
def test_nsGN(self):
"""ssGN correctly ignores provided args"""
kw = {
"model_gaps": False,
"name": "StrandSymmetric",
"optimise_motif_probs": True,
"recode_gaps": True,
}
StrandSymmetric(**kw)
def test_nr_nucleotide(self):
"""This is exercising a NonReversibleNucleotide"""
preds = [
MotifChange("A", "C", forward_only=True),
MotifChange("G", "A", forward_only=True),
]
sm = NonReversibleNucleotide(predicates=preds)
got = sm.get_param_list()
self.assertEqual(got, ["A>C", "G>A"])
def test_nr_dinucleotide(self):
"""This is exercising a NonReversibleDinucleotide"""
preds = [
MotifChange("A", "C", forward_only=True),
MotifChange("G", "A", forward_only=True),
MotifChange("CG", "TG", forward_only=True),
]
sm = NonReversibleDinucleotide(predicates=preds)
got = sm.get_param_list()
self.assertEqual(got, ["A>C", "G>A", "CG>TG"])
def test_nr_trinucleotide(self):
"""This is exercising a NonReversibleTrinucleotide"""
preds = [
MotifChange("A", "C", forward_only=True),
MotifChange("G", "A", forward_only=True),
MotifChange("CGA", "TGA", forward_only=True),
]
sm = NonReversibleTrinucleotide(predicates=preds)
got = sm.get_param_list()
self.assertEqual(got, ["A>C", "G>A", "CGA>TGA"])
self.assertEqual(len(sm.get_motifs()), 64)
def test_nr_codon(self):
"""This is exercising a NonReversibleCodon"""
preds = [
MotifChange("A", "C", forward_only=True),
MotifChange("G", "A", forward_only=True),
MotifChange("CG", "TG", forward_only=True),
"replacement",
]
sm = NonReversibleCodon(predicates=preds)
got = sm.get_param_list()
self.assertEqual(got, ["A>C", "G>A", "CG>TG", "replacement"])
def test_nr_protein(self):
"""This is exercising a NonReversibleProtein"""
preds = [
MotifChange("D", "K", forward_only=True),
MotifChange("R", "V", forward_only=True),
]
sm = NonReversibleProtein(predicates=preds)
got = sm.get_param_list()
self.assertEqual(got, ["D>K", "R>V"])
|