File: test_ns_substitution_model.py

package info (click to toggle)
python-cogent 2024.5.7a1%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 74,600 kB
  • sloc: python: 92,479; makefile: 117; sh: 16
file content (316 lines) | stat: -rw-r--r-- 52,054 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
import warnings

from unittest import TestCase

import numpy

from numpy import array, dot, empty
from numpy.testing import assert_allclose

from cogent3 import DNA, make_aligned_seqs, make_tree
from cogent3.evolve.ns_substitution_model import (
    DiscreteSubstitutionModel,
    General,
    GeneralStationary,
    NonReversibleCodon,
    NonReversibleDinucleotide,
    NonReversibleNucleotide,
    NonReversibleProtein,
    NonReversibleTrinucleotide,
    StrandSymmetric,
)
from cogent3.evolve.predicate import MotifChange
from cogent3.evolve.substitution_model import TimeReversibleNucleotide


def _make_likelihood(model, tree, results, is_discrete=False):
    """creates the likelihood function"""
    # discrete model fails to make a likelihood function if tree has
    # lengths
    if is_discrete:
        kwargs = {}
    else:
        kwargs = dict(expm="pade")

    lf = model.make_likelihood_function(tree, optimise_motif_probs=True, **kwargs)

    if not is_discrete:
        for param in lf.get_param_names():
            if param in ("length", "mprobs"):
                continue
            lf.set_param_rule(param, is_independent=True, upper=5)

    lf.set_alignment(results["aln"])
    return lf


_aln = {
    "Human": "ATGCGGCTCGCGGAGGAGCGGGCCGCGCTC------GCGGCGGAGAACGCGGATGGGGAACCCGGC---GCCGACCGACGACTGCGACTCCTGGGGACCTACGTGGCCATGAGCCTGCGGCCGGCTGCGGGCGCCTGGGAGCGTTGCGCGGGGAGTGCTGAGGCGGAGCAGCTGCTCCAGGCCTTCCTG---GGCCGCGATGCTGCCGAGGGGCCGCGGCCG------CTGCTGGTGGTGCGGCCCGGGCCCAGGGGCCTGGCAATACGCCCCGGGCTGGAGGTGGGACCTGAGTCGGGCCTGGCTGGCGCTAAGGCGCTTTTTTTCCTTCGCACCGGG---CCCGAGCCTCCAGGGCCCGACAGCTTCCGCGGCGCAGTGGTCTGCGGGGACCTGCCCGCGGCACCTCTGGAGCACCTAGCCGCGCTGTTCTCGGAGGTTGTTCTACCCGTCCTGGCCAATGAGAAGAATCGCCTAAACTGGCCCCACATGATATGTGAGGATGTCAGGCGGCACGCCCACAGCCTCCAATGTGACCTCTCAGTTATACTTGAGCAAGTGAAGGGAAAAACTTTGCTGCCTCTTCCAGCAGGCTCAGAAAAAATGGAGTTTGCGGATTCCAAAAGTGAGACAGTCTTGGATTCTATAGATAAGTCAGTCATCTATGCCATTGAGTCTGCAGTGATCAAATGGAGCTACCAAGTCCAGGTGGTACTCAAGAGAGAGTCTTCCCAGCCACTCTTACAAGGGGAGAATCCCACCCCTAAGGTGGAGTTGGAGTTCTGGAAGAGCAGGTATGAAGATCTGAAATACATCTATAATCAACTGAGAACAATAACGGTGAGGGGCATGGCCAAGCTCCTGGACAAGCTTCAGAGTAGCTACTTTCCAGCTTTCAAAGCCATGTACAGAGATGTTGTTGCAGCTCTAGCAGAGGCACAGGACATCCATGTGCACCTGATACCGCTCCAGCGCCACCTGGAAGCTCTGGAGAATGCAGAATTTCCGGAGGTGAAGCCCCAGCTGCGGCCCCTGCTCCACGTGGTCTGTCTGATTTGGGCCACATGCAAGTCCTACCGCTCCCCGGGAAGGCTGACTGTGCTGCTCCAGGAGATTTGCAACCTTCTCATCCAGCAGGCCTCTAATTATCTCAGCCCAGAAGACCTGCTGAGAAGTGAGGTAGAAGAAAGTCAGAGAAAACTGCAAGTGGTCTCAGACACTTTGAGCTTCTTCAAGCAAGAGTTTCAGGACAGAAGGGAGAATCTCCACACTTACTTCAAAGAGAACCAGGAAGTCAAGGAATGGGATTTCCAGTCTTCTTTGGTCTTTGTGCGATTGGATGGCTTCCTGGGACAACTGCACGTGGTGGAGGGTCTTCTGAAGACGGCCCTGGATTTCCACAAACTGGGAAAGGTGGAGTTCAGCGGCGTCAGAGGGAATGCTCTGAGTCAGCAGGTCCAGCAAATGCATGAAGAATTTCAAGAGATGTACAGGCTTCTCTCAGGATCCTCCTCCGACTGCCTGTACCTCCAAAGCACGGACTTTGAAAATGACGTCTCTGAATTTAACCAGAAAGTAGAAGATCTTGACCGAAGATTGGGGACTATCTTTATTCAAGCTTTTGATGATGCACCTGGCTTGGAGCATGCCTTTAAGCTGCTAGACATAGCAGGAAACCTCCTTGAAAGACCGCTGGTAGCGAGGGATACATCTGATAAATACCTGGTCCTCATCCAAATGTTCAACAAAGATCTGGATGCAGTGAGGATGATCTACAGTCAGCACGTCCAGGAGGAAGCAGAACTTGGGTTCTCCCCGGTGCACAAGAACATGCCCACCGTGGCTGGCGGCCTCCGCTGGGCACAGGAGCTGAGGCAGCGCATCCAGGGTCCTTTCAGCAACTTTGGACGCATCACACACCCTTGCATGGAATCTGCAGAAGGAAAGCGAATGCAACAAAAATATGAAGATATGCTGTCATTGCTAGAAAAGTATGAGACAAGACTTTATGAGGATTGGTGCCGGACAGTATCAGAGAAGTCACAGTACAATCTTTCCCAACCACTTCTAAAACGTGACCCAGAGACGAAGGAGATCACTATCAACTTTAACCCACAGCTGATTTCAGTGCTGAAAGAAATGAGCTATCTTGAACCCAGAGAGATGAAACACATGCCTGAGACAGCAGCAGCCATGTTCTCCTCCAGGGATTTCTATCGGCAGCTTGTGGCTAATTTAGAGTTGATGGCAAATTGGTACAACAAGGTTATGAAAACTCTGCTGGAGGTGGAATTTCCATTAGTGGAGGAAGAGCTGCAAAATATTGATCTCCGCCTCAGAGCAGCAGAGGAGACTTTGAACTGGAAAACAGAAGGCATTTGCGATTATGTCACTGAAATCACCAGTAGTATTCATGATCTTGAACAAAGAATTCAGAAAACTAAAGACAATGTGGAAGAGATCCAAAACATCATGAAAACATGGGTGACTCCAATATTTAAGACAAAAGATGGAAAAAGGGAATCCCTTCTTTCTCTGGATGATCGGCATGATCGAATGGAAAAATATTACAATCTCATCAAGGAATCTGGCCTTAAGATCCACGCCCTTGTTCAGGAAAACCTGGGTCTATTTTCAGCAGACCCAACCTCCAATATCTGGAAGACTTATGTTAACTCTATTGACAATTTGTTGCTGAATGGATTCTTTCTTGCCATTGAGTGCTCCCTCAAGTATCTTCTGGAAAATACTGAGTGTAAGGCAGGACTTACCCCAATATTTGAAGCACAACTGAGTCTAGCCATCCCAGAGCTAGTTTTCTATCCGTCTCTGGAGTCTGGAGTGAAGGGGGGTTTCTGTGACATTGTTGAGGGTCTCATCACCAGCATTTTTAGGATACCATCTCTGGTGCCACGGCTTTCCCCACAAAATGGCTCTCCTCACTATCAGGTCGACCTGGACGGTATACCAGATTTGGCAAACATGCGGCGCACACTCATGGAGAGAGTCCAGAGAATGATGGGCCTCTGCTGTGGCTATCAGAGCACCTTCAGCCAGTATTCGTACCTCTATGTGGAGGACCGGAAGGAGGTTCTGGGTCAGTTTCTGCTGTACGGGCACATCCTCACTCCGGAAGAAATTGAAGACCATGTGGAAGATGGCATCCCAGAGAACCCTCCCCTCCTTTCTCAGTTTAAAGTGCAAATCGACTCCTATGAAACGCTCTATGAAGAGGTGTGCAGGCTGGAACCCATCAAGGTGTTTGACGGCTGGATGAAAATTGATATTCGACCCTTTAAGGCATCTCTGCTGAATATTATTAAGAGGTGGAGCCTCCTGTTCAAACAGCATCTTGTGGACCACGTCACTCACAGCTTGGCCAACCTGGATGCGTTTATAAAGAAGAGTGAGAGCGGCTTACTCAAGAAAGTTGAAAAAGGAGATTTCCAAGGCTTGGTTGAGATCATGGGACACCTTATGGCTGTTAAAGAACGGCAGAGTAACACTGATGAGATGTTTGAGCCCTTAAAGCAGACTATTGAATTGCTGAAGACCTATGAACAAGAATTGCCAGAAACAGTGTTTAAGCAGCTGGAGGAGCTGCCTGAGAAATGGAACAACATAAAAAAGGTGGCCATTACTGTGAAGCAGCAGGTGGCCCCACTGCAGGCAAATGAAGTGACACTCCTCCGCCAGAGGTGCACAGCCTTCGATGCAGAACAGCAGCAATTCTGGGAGCAATTCCACAAAGAAGCCCCGTTCAGGTTTGATAGCATCCACCCTCATCAAATGCTGGATGCCAGGCACATCGAGATCCAGCAGATGGAATCCACTATGGCCTCCATTTCTGAGTCTGCCAGCTTATTTGAAGTCAATGTCCCTGACTATAAGCAGCTGAGGCAGTGCAGGAAGGAGGTCTGCCAGCTGAAGGAGCTCTGGGACACCATTGGAATGGTGACCTCCAGCATCCATGCCTGGGAGACCACACCCTGGAGGAATATCAACGTGGAAGCCATGGAGTTGGAGTGCAAACAGTTTGCCCGGCATATCCGAAACCTGGACAAGGAGGTCAGGGCCTGGGATGCATTCACAGGCCTGGAAAGCACTGTGTGGAACACGCTGAGCTCCCTGAGGGCAGTAGCTGAGCTGCAGAATCCAGCCATCCGGGAGCGGCACTGGAGGCAGCTGATGCAGGCCACCGGTGTGAGCTTCACTATGGACCAGGACACCACCCTAGCGCACCTGCTGCAGCTCCAGCTGCACCACTATGAGGATGAGGTCCGGGGCATTGTGGACAAAGCTGCAAAAGAGATGGGTATGGAGAAAACCTTAAAGGAGCTGCAGACTACCTGGGCTGGCATGGAATTCCAGTATGAGCCCCACCCACGGACCAATGTCCCCCTCCTGTGCTCTGATGAGGACCTCATAGAGGTTCTGGAGGATAATCAAGTTCAACTTCAGAACCTGGTGATGTCCAAGTATGTTGCTTTCTTCTTGGAGGAGGTGTCGGGCTGGCAGAAGAAGCTGTCCACAGTGGACGCTGTCATCTCTATCTGGTTTGAAGTGCAGCGAACATGGACTCACCTGGAAAGCATATTCACTGGATCTGAAGATATTCGGGCACAGCTACCCCAGGATTCTAAAAGGTTTGAAGGCATCGACATTGACTTTAAAGAGCTAGCTTATGATGCCCAGAAAATTCCAAATGTAGTGCAAACCACCAACAAGCCAGGCCTGTATGAAAAGCTGGAGGATATTCAGGGCAGATTGTGCCTGTGTGAGAAGGCCCTGGCAGAGTACCTCGACACCAAGAGGCTTGCCTTCCCGCGGTTTTACTTTCTCTCCTCCTCCGATCTGTTAGACATCCTTTCCAACGGCACAGCTCCACAACAGGTTCAACGTCACCTTTCCAAACTCTTTGACAACATGGCCAAGATGCGATTCCAGCTAGATGCCAGTGGGGAACCAACCAAGACAAGCCTCGGCATGTACAGCAAAGAAGAGGAGTATGTGGCTTTCAGTGAGCCCTGTGACTGCAGCGGGCAGGTAGAAATATGGCTGAACCATGTCCTTGGTCACATGAAGGCCACTGTGAGGCATGAGATGACAGAAGGTGTAACTGCCTATGAAGAAAAGCCGAGGGAGCAGTGGCTTTTTGACCACCCAGCTCAGGTGGCCCTGACCTGTACTCAGATCTGGTGGACAACAGAAGTGGGCATGGCATTTGCCAGGCTGGAGGAAGGCTATGAGAGTGCCATGAAGGACTATTATAAGAAGCAAGTGGCCCAGCTCAAAACCCTTATCACCATGCTGATTGGCCAGCTCTCCAAGGGAGACCGGCAGAAGATTATGACTATATGCACCATCGATGTGCATGCCCGGGATGTGGTAGCCAAGATGATTGCTCAGAAGGTAGACAATGCCCAGGCTTTCCTCTGGCTGTCTCAGCTGCGCCATCGTTGGGATGACGAGGTCAAACACTGCTTTGCCAACATCTGTGATGCCCAGTTTTTGTATTCCTATGAGTACCTGGGAAACACACCTCGCTTGGTGATCACACCTTTGACT------GACAGGTGCTACATCACCCTCACCCAGTCCCTGCACCTGACCATGAGTGGGGCTCCCGCAGGACCTGCAGGCACAGGCAAGACCGAGACCACCAAGGACCTGGGCCGCGCACTGGGCATCCTGGTCTATGTGTTCAACTGCTCGGAGCAGATGGATTACAAGTCTTGTGGCAACATCTACAAAGGCCTTGCTCAGACTGGTGCCTGGGGCTGCTTTGATGAGTTTAATCGAATCTCCGTGGAGGTCTTGTCAGTGGTGGCAGTGCAGGTAAAAAGCATTCAAGATGCGATTAGAGATAAGAAGCAGTGGTTCAGCTTCCTTGGGGAGGAGATCAGCCTGAATCCTTCTGTCGGTATCTTCATCACCATGAACCCAGGCTATGCTGGCCGCACAGAGCTGCCAGAGAATCTCAAGTCTCTCTTCAGGCCTTGTGCAATGGTGGTTCCAGACTTTGAATTGATCTGTGAAATCATGCTGGTGGCAGAAGGATTCATTGAAGCCCAGTCATTAGCCAGAAAGTTCATCACTCTTTACCAGTTGTGCAAAGAGCTTCTCTCCAAACAGGATCACTACGACTGGGGCCTACGGGCCATCAAGTCCGTGCTGGTGGTGGCAGGATCCCTGAAGAGAGGAGACCCTGACCGGCCTGAGGACCAGGTCCTGATGCGCTCCTTGCGGGATTTCAACATCCCCAAGATTGTGACTGATGACATGCCCATCTTCATGGGCCTGATCGGGGACCTCTTTCCCGCCCTGGATGTCCCCCGGAGGAGAGACCCCAACTTCGAAGCTTTGGTTAGGAAGGCGATAGTGGATCTGAAGCTCCAGGCTGAGGACAACTTTGTGCTCAAGGTGGTCCAGCTGGAGGAGCTCCTGGCTGTGCGGCACTCTGTA---TTTGTGGTGGGTGGCGCTGGT------ACCGGCAAGTCACAGGTGCTGAGGTCCTTGCACAAGACCTATCAG------ATCATGAAACGGCGCCCCGTCTGGACTGACCTCAATCCCAAAGCAGTCACAAATGATGAGCTCTTT---------------------------------------GGCATCATCAATCCAGCCACAGGAGAATGGAAGGATGGA---TTGTTCTCTTCCATCATGCGGGAGCTTGCCAACATCACCCATGATGGGCCCAAGTGGATTTTACTGGATGGCGACATAGATCCAATGTGGATTGAATCCCTGAATACTGTCATGGATGATAACAAGGTGCTGACATTGGCCAGCAATGAGAGGATTCCTCTGAACCCCACCATGAAGCTCCTCTTTGAGATCAGCCACCTGCGCACAGCCACTCCAGCAACTGTCTCTAGAGCAGGGATCTTGTACATCAACCCGGCAGACTTGGGATGGAACCCTCCAGTGAGCAGCTGGATTGAGAAGAGGGAAATCCAGACAGAGAGAGCCAACTTAACCATTTTGTTCGACAAGTATCTTCCAACCTGCCTAGACACACTCAGAACCAGGTTTAAGAAGATCATTCCCATCCCAGAGCAGAGCATGGTTCAGATGGTGTGTCACCTTCTGGAATGTCTCCTGACCACGGAGGACATCCCTGCAGACTGCCCTAAGGAAATTTATGAGCATTATTTTGTGTTTGCTGCCATCTGGGCTTTCGGCGGAGCAATGGTCCAAGATCAGCTTGTGGACTACCGGGCAGAGTTCAGCAAATGGTGGCTGACTGAGTTCAAAACAGTCAAGTTTCCTTCCCAAGGAACCATCTTTGACTATTACATCGACCCAGAGACCAAGAAATTCGAGCCTTGGTCCAAGCTCGTCCCCCAGTTCGAATTTGACCCCGAGATGCCCTTGCAGGCGTGTTTGGTGCACACGAGTGAGACCATCCGTGTGTGCTACTTCATGGAGCGGTTGATGGCGCGGCAGCGGCCTGTCATGCTGGTGGGCACGGCTGGCACTGGCAAGTCGGTGCTGGTGGGAGCTAAGCTGGCCAGCCTTGACCCCGAGGCATACCTGGTGAAAAACGTGCCATTCAACTACTACACCACGTCAGCAATGCTGCAGGCTGTCCTGGAGAAGCCTCTGGAAAAGAAGGCTGGCAGAAACTATGGCCCTCCAGGGAACAAGAAACTCATCTATTTCATTGATGACATGAACATGCCTGAGGTGGATGCCTACGGGACGGTGCAGCCCCACACCATCATCCGGCAGCATCTGGACTATGGCCACTGGTATGATCGGAGCAAGCTGTCCCTAAAGGAGATCACAAATGTACAGTATGTTTCCTGTATGAACCCCACGGCAGGCAGCTTCACCATCAACCCCCGGCTTCAGCGTCACTTCAGCGTGTTTGTCCTCTCCTTCCCGGGGGCAGATGCCCTGTCCTCTATCTACAGCATCATCCTCACTCAGCATCTGAAGCTCGGAAACTTCCCGGCGTCCCTGCAGAAATCCATCCCCCCACTGATCGATCTGGCCCTCGCCTTCCACCAGAAAATTGCTACCACCTTCCTACCCACAGGAATCAAATTCCACTACATCTTCAACCTCAGAGATTTTGCCAACATTTTCCAGGGCATTCTCTTCTCCTCAGTGGAATGTGTGAAATCCACATGGGATCTTATAAGGCTCTATCTGCATGAATCAAATCGAGTTTATCGGGATAAGATGGTAGAAGAAAAGGACTTTGATCTTTTTGATAAAATCCAGACAGAAGTGCTCAAGAAAACTTTTGATGATATTGAAGACCCTGTGGAGCAGACCCAAAGCCCGAACCTGTATTGTCACTTTGCAAATGGTATTGGGGAGCCCAAATACATGCCTGTACAGTCTTGGGAACTTTTGACCCAGACTCTGGTGGAGGCCTTGGAGAACCACAATGAAGTCAACACAGTGATGGACCTAGTTCTCTTTGAGGATGCCATGCGCCATGTCTGCCATATCAATCGCATCTTGGAGTCCCCGCGGGGAAATGCTCTGCTGGTTGGTGTAGGTGGGAGCGGCAAGCAGAGCCTGACAAGGCTGGCAGCTTTCATCAGCTCCATGGATGTCTTCCAGATCACACTGCGCAAAGGCTACCAGATCCAGGACTTCAAGATGGACCTGGCCAGCCTGTGTCTGAAAGCTGGAGTGAAGAATCTCAACACAGTGTTTCTCATGACTGATGCCCAAGTGGCTGATGAGAGGTTCCTTGTGCTCATCAATGATCTTTTGGCATCTGGGGAGATCCCAGATCTCTACTCTGATGATGAAGTTGAAAACATCATAAGCAATGTGAGGAATGAAGTCAAGAGCCAGGGTCTGGTTGACAACAGAGAGAACTGTTGGAAGTTCTTTATAGATCGGATCCGGCGACAGCTGAAGGTGACTCTCTGTTTCTCCCCTGTGGGAAACAAGCTAAGAGTCCGCAGCAGGAAGTTCCCAGCCATTGTGAACTGCACAGCCATCCACTGGTTCCACGAGTGGCCTCAGCAAGCATTGGAGTCTGTCAGCCTCCGCTTCTTGCAGAACACAGAGGGCATTGAGCCCACAGTAAAGCAGTCGATTAGCAAATTCATGGCCTTTGTCCACACAAGTGTCAACCAAACATCCCAGTCTTATCTGAGCAATGAACAGCGCTACAACTATACAACTCCCAAGTCCTTTCTGGAGTTCATCAGACTCTACCAGAGCTTGTTGCACAGGCACAGAAAAGAGCTCAAGTGCAAGACAGAGCGGTTGGAGAACGGGCTGCTGAAGCTGCATAGCACCTCTGCCCAGGTGGATGATCTGAAAGCAAAGCTGGCTGCCCAGGAAGTAGAGCTGAAGCAGAAAAATGAAGATGCAGACAAACTGATTCAGGTCGTGGGTGTGGAGACTGACAAAGTGAGCAGAGAGAAAGCCATGGCAGATGAAGAGGAGCAGAAGGTGGCCGTCATCATGCTAGAGGTGAAACAGAAGCAGAAGGACTGTGAGGAGGACCTGGCAAAGGCTGAGCCAGCACTCACAGCAGCGCAGGCAGCTCTCAACACCCTGAACAAGACCAACCTGACAGAGCTGAAGTCATTTGGCTCTCCGCCTCTGGCCGTCAGCAATGTCAGCGCTGCGGTGATGGTACTGATGGCTCCCAGGGGTAGGGTGCCCAAGGACCGGAGCTGGAAGGCTGCTAAGGTCACCATGGCCAAAGTGGATGGCTTCCTGGACTCGCTAATAAACTTCAACAAAGAGAACATTCACGAGAACTGCCTCAAAGCCATCAGGCCGTATCTGCAAGACCCCGAGTTCAATCCTGAGTTTGTGGCCACCAAATCCTATGCGGCTGCAGGCCTCTGCTCCTGGGTCATCAATATTGTGAGATTTTATGAGGTGTTCTGTGATGTGGAACCCAAGCGCCAGGCACTGAACAAAGCCACCGCGGACCTCACAGCTGCCCAGGAGAAGCTGGCTGCCATCAAAGCCAAGATCGCTCACCTTAATGAAAACCTGGCAAAGCTCACAGCCAGGTTTGAGAAAGCAACAGCAGACAAACTCAAATGTCAGCAAGAAGCCGAAGTGACCGCAGTCACCATCTCCCTTGCCAACCGCCTGGTTGGAGGACTCGCTTCTGAAAACGTGAGGTGGGCAGATGCCGTGCAGAACTTCAAACAGCAGGAAAGGACGTTATGTGGAGACATTTTACTTATAACGGCTTTCATTTCCTACCTTGGCTTCTTCACAAAGAAATACCGGCAGAGCCTCCTGGACAGAACTTGGAGGCCCTACCTGAGCCAGCTGAAAACTCCCATTCCAGTCACCCCAGCCCTGGATCCCCTGAGGATGCTGATGGATGATGCTGACGTGGCTGCCTGGCAGAACGAGGGCCTCCCAGCCGACCGCATGTCCGTGGAGAATGCCACCATTCTCATCAACTGTGAGCGCTGGCCACTCATGGTTGACCCTCAGCTACAAGGCATCAAATGGATCAAGAATAAATATGGTGAAGATCTCCGGGTCACGCAGATTGGTCAGAAAGGCTACCTTCAAATCATAGAGCAGGCCCTGGAAGCTGGAGCTGTGGTGCTGATTGAAAATCTAGAGGAGTCCATTGATCCTGTTCTGGGACCCCTGCTTGGGAGAGAAGTCATTAAAAAAGGACGATTCATTAAAATTGGAGACAAAGAATGTGAATACAATCCCAAGTTCCGGCTCATCCTCCACACCAAGCTGGCTAATCCTCACTACCAGCCTGAGCTGCAGGCTCAGGCCACCCTGATCAACTTCACCGTGACCAGGGATGGCCTGGAGGACCAGTTGCTGGCCGCTGTGGTCAGCATGGAGAGGCCAGACTTGGAGCAGCTGAAGTCCGATCTCACAAAGCAGCAGAATGGATTCAAAATTACCCTGAAAACGTTGGAAGACAGTCTTCTCTCTCGCCTCTCCTCCGCCTCTGGGAACTTCCTGGGAGAAACAGTGCTGGTGGAAAACCTAGAGATCACCAAGCAGACTGCTGCCGAAGTTGAGAAAAAGGTCCAGGAGGCCAAGGTGACTGAAGTGAAAATCAACGAGGCCCGAGAGCACTACCGGCCAGCAGCTGCCAGGGCCTCACTGCTCTACTTCATCATGAACGACCTCAGCAAGATCCATCCAATGTACCAGTTTTCTCTCAAGGCCTTCAGTATCGTCTTCCAGAAGGCTGTGGAG------AGGGCTGCTCCTGACGAAAGCCTCAGGGAGCGGGTGGCCAACCTAATAGACAGCATAACCTTCTCTGTGTACCAGTACACCATCCGCGGGCTCTTTGAGTGTGATAAGCTGACCTACCTTGCCCAGCTCACCTTTCAGATTCTCCTCATGAACCGAGAAGTCAATGCAGTGGAGTTGGATTTCCTGCTTCGATCTCCAGTGCAGACGGGCACCGCCAGCCCCGTGGAGTTCCTCTCCCATCAGGCGTGGGGAGCTGTCAAGGTACTTTCATCAATGGAAGAATTCTCTAATCTGGATCGGGACATAGAGGGATCTGCTAAGAGCTGGAAAAAGTTTGTGGAGTCCGAATGTCCTGAGAAAGAGAAGCTCCCACAGGAGTGGAAGAACAAGACAGCCCTGCAGCGCCTCTGCATGCTGAGAGCCATGCGGCCCGACCGGATGACCTATGCTTTGCGAGATTTTGTTGAAGAGAAGTTAGGAAGCAAATACGTGGTGGGAAGAGCCCTAGATTTTGCAACCTCATTTGAAGAATCGGGACCAGCCACTCCTATGTTTTTCATCCTGTCTCCAGGGGTGGACCCACTGAAGGATGTAGAAAGTCAAGGAAGAAAACTTGGATACACCTTCAACAATCAGAACTTTCACAACGTGTCTTTGGGGCAAGGACAGGAAGTGGTGGCTGAGGCTGCGCTGGACCTCGCTGCCAAGAAAGGTCACTGGGTTATTTTGCAGAACATTCACCTGGTGGCCAAGTGGCTCAGCACCCTGGAGAAGAAGCTGGAGGAGCACAGTGAGAACAGCCACCCAGAGTTCAGGGTCTTCATGAGTGCAGAGCCAGCACCCTCCCCTGAGGGCCACATCATCCCCCAGGGCATCCTGGAGAACTCCATTAAGATCACCAATGAGCCCCCCACGGGCATGCATGCCAACCTGCACAAGGCCCTGGACAACTTCACTCAGGACACTCTGGAGATGTGTTCTCGGGAGACGGAGTTTAAGAGCATCCTCTTTGCTCTTTGTTACTTCCATGCGGTGGTGGCAGAAAGACGAAAATTTGGGCCCCAGGGATGGAATCGCTCATACCCCTTTAACACTGGAGACCTCACTATCTCTGTGAATGTCCTCTACAACTTCCTGGAGGCCAACGCAAAGGTCCCCTATGATGATTTGCGCTACCTGTTTGGAGAGATCATGTATGGAGGCCATATCACAGATGACTGGGACAGAAGACTCTGCAGAACCTACCTGGGGGAATTCATTCGACCAGAAATGTTAGAAGGAGAACTGTCTTTGGCCCCAGGGTTCCCACTCCCAGGCAACATGGACTACAATGGTTATCATCAGTACATCGATGCTGAGCTGCCCCCAGAATCCCCCTACCTCTATGGCCTCCACCCGAACGCAGAGATTGGCTTCCTGACCCAAACCTCAGAAAAGCTCTTCCGCACTGTGCTGGAGCTGCAGCCTCGGGACAGCCAGGCCAGAGACGGAGCGGGCGCCACAAGAGAAGAAAAGGTCAAGGCACTTCTGGAAGAAATATTGGAGCGGGTGACAGACGAGTTTAACATCCCAGAACTGATGGCCAAAGTGGAGGAGCGCACCCCTTACATTGTAGTTGCCTTCCAGGAGTGTGGCCGGATGAATATCCTCACCAGAGAGATTCAGCGCTCACTGAGGGAGCTGGAGCTCGGCTTAAAGGGGGAGCTGACTATGACCAGCCACATGGAGAACTTACAGAATGCCCTGTACTTCGATATGGTGCCAGAGTCCTGGGCTAGACGAGCCTACCCTTCCACAGCAGGCCTGGCAGCCTGGTTTCCAGACCTCCTCAACAGAATCAAGGAGCTAGAGGCTTGGACGGGTGACTTTACAATGCCCTCCACTGTGTGGCTGACAGGCTTCTTCAACCCCCAGTCGTTCCTGACTGCCATCATGCAGTCCACGGCTCGCAAGAATGAGTGGCCACTGGACCAGATGGCCCTGCAATGTGACATGACGAAGAAGAACAGAGAAGAGTTTAGGAGTCCTCCTCGGGAAGGGGCCTACATCCATGGCCTCTTCATGGAAGGTGCCTGCTGGGACACACAGGCTGGGATCATTACAGAGGCAAAGCTGAAGGATCTGACACCCCCTATGCCTGTGATGTTCATCAAGGCCATTCCTGCAGATAAGCAGGACTGCCGCAGTGTCTATTCCTGTCCTGTGTACAAGACTAGTCAGCGGGGACCCACCTACGTGTGGACTTTCAACCTGAAGACTAAGGAAAACCCATCCAAGTGGGTTCTGGCTGGAGTAGCCTTGCTTCTCCAGATT",
    "Mouse": "ATGCCCGGCGCCAAGGAGCAGGCAGCGCTG---------GCGGAGTCTGGGGACGAGGAGCCTGGA------GACCCGAGGCTGCGGCTTCTGGGGACTTTTGTGGCTCGGAGCCTGCGTCCGGCCGCGGGCACCTGGGAGCGCTGTGCAGGCACAGCCGAGGCGGGGAGGCTGCTGCAGGCCTTCCTG---GATCACAACGCTGCCTCGGATCCGCGGCCA------CTGCTGGTGGTTCAGTCCGGGCCCGGGGGCCTGGTGGTGACACCCGGTCTAGACGCAGGACCAGAGCCCAGCCGAGCTCGCGCCAAGGGGCTCTTTTTCTTGCGCACTAAG---TCCGAGCCTCCGGGAAATCACAGCCTCCGCGGCACGGTGCTCTGCGGGGACCTACCCGCGGTGCCACTGGAGCACCTGGCCCCGCTGCTCTCAGAGGTCATTATTCCTGTCCTGGCAAATGAAAAGAACCATTTAGAATGGCCCCACATGGTATGTCAAGACATCAGACATCATGCCCACACCCTGAAGTCTGACCTCCTAGTGATCTTTGAGCACATGAAGGGGAGAACCTTGCTGCCTCTTCCAGTTGGCTCAGAAAAACTGGAGTTTGTGGATGGCCACAGTGAGCCAGTCTCAGATGCCATAGACAAGTCAACTCTCTATGCTGTGGAGTCTGCAGTGATCAAATGGAGCCACCAAGTCCAGGTGGTACTCAAGAGGGAGTCTTCTCAGGCACTCATACAAGGACAAAATCCCACCCCCAAGGTGGAGCTGGAGTTCTGGAAGAGCAGGTGTGAGGACCTGGAACACATTTATAATCAACTAATGACAATCAAGGTGAAGGGAATGGCTGAACTCCTGGACAAACTTCAGAGCAGCTACTTGCCAGCTTTCAAAGCCATGTTCAGAGACGTTGAAGCAGCCCTGACCGAGGCCCAGGACATCCATGTGCACCTGTTACCTCTCCAGCAACACCTGGACATCCTGGAAAACGTGGAGTTTCCCAAGGTGAAGGGCAGGCTGCGGCCTCTGCTCCATGTGGTCTGTCTGATTTGGGCCACCTGCAAATGGTACCGTTCCCCTGGGAGGCTCACAGTGCTGCTCCAAGAAATCTGCAACCTCCTCATCCAGCAGGCCTCTAATTACCTCAGCCCAGAAGACCTCCTGAGAAGTGAGGTGGAAGAGAGTCAGAAAAAACTGCAAGTGGTCTCAGATACCTTAAGCTTCTTCAAACAGGCATTCCAGGACAGAAGGGAGCACCTCCACACTTACTTCAAGGAGGATTCTGAAGTCAGGGTGTGGGATTTCCAAGCATCTCTGGTGTTTGTGCGACTGGATGGCTTTCTGGGCCGAGTGCACATGGTGGAGGATCTTCTGAAGACAGCCTTGGATCTCAACAATCTGGAAAAGCTTGAGTTCAGTGGCCTCAGAGGAAACTCCCTGAGTCAGAAAGTCCAACGCATGCATGAGGAATTTGAGGAGATGTACAAGGTCTTCTTGGACTGCTCCTATGACTGTTTGGACCCCAAGGGCACGGAATTTGAAAATGATGTCTGTGAGTTTAACAAAAGAGTGGAAGATCTTGACCGGAGACTGGGGACTATCTTAATTCAAGCTTTTGATGATGCACCTGATGTGGAACATGCCTTTAAGCTACTGGACATCACAGGAACCCTCATCAAAAGACCCCTGGTAGCACAGGATGTATCACAAAAATACCTGGCCCTCATCCGAATGTTCAGCACAGAACTGGATGCTGTGAGGGTCATCTACAGTCAGCACATCCAGAAGGAGGCAGAGCATGGATTCTCCCCCGTGCACAAGAACATGCCCACTATGGCTGGCGGCATCTGCTGGGCACAGGAACTGAGGCAGCGCGTCAAGGGTCCCTTTGGCAACTTCAAAAACATACCACATCTGTACTTGCAATCTGCTGAAGGAAAGCGAATGATACAAAAATACGAAGACTTGCTCTCCCTGCTAGAAGAGTATGAGAGAAGACTTTATGAGGACTGGTGTCAGACGGTATCTGAAAAGTCACAGTACAATCTTTCCCTACCTCTTTTGCATCGTGACCCCAACACAAAGCAGCTCTCTGTCAACTTTAACCCACAGCTGATTTCAGTGTTGAAAGAAATGAACTATCTTCAGCCCAGTGAGGTGAAAACCATCCCCGAGACCGCAGCAGCCATGTTCTCCTCCAGGGAATTCTATCGTCAGCTTGTGGCCAACTTGGAGTTGATGGCAAATTGGTACAACAAGGTTATAAAAATTCTGCTGGAGGTGGAATTTCCACTAGTGGAGGAAGAACTGCAAAATATTGATCTCCGCCTGAGAGCTGCAGAGGAGACTCTGAGCTGGAAAACAGAAGGCATTTGGGATTATGCTATGCAAATAACCAATAGCATTCATGACCTGGAACAAAGAATTCAGAAGACAAAAGACAATGTGGAAGAGATTCAAAACATCATGAAAACATGGGTGTCTCCAATATTCAAGAGAAAAGATGGGAAAAAAGAATGGCCCCTTTCTCTGGATGATCAGCAGGATCACATGGAAAAATACTACAGTCTCATCCAGGAATCTGGCCTTAAGATTCACGCTCTTGTTCAGGAAAACCTGGTTCTGTTTGCAGCAGACCCAGCATCCAGCATTTGGAAGTCTTACGTGAACTACATTGATTCCATGTTGTTGGATGGATTTTTTCTTGCCATTGAGTGTTCTCTCAAATATCTATTGGAAAACACTGAATGCAAGCCTGGACTCACCCCAGTATTTGAAGCACAGCTCAACCTTGTCACCCCAGAATTAGTTTTCCACCCCTCTCTGGACTCTGGGGTAAAGGGAGGCTTATATGACATTGTCCAGAGTCTTGTCACCAGAATTTTTGCTATGCCATCCCTCGTGCCACGGCTTTCCCCACACAGTGGCTCTCCTCACTATCAGGGTGACCTAGAGGACATGGCCGACTTAGCTGGTCTTCGGAGTGTGCTCATGGAGAGGGTACAGAATATGATGACCCTCTGCTGTGGCTATAGAAATACCCTCAGCCAGTATTCTTACCTGTATGTGGAGGATAGGAAGGAGATTCTTGGTCAGTTTCTGCTCTATGGGCATGTCCTCACACCTGAAGAGATAGAAGCCCATGCCGAAGACGGCATTCCAGAAAATCCACCCCTCCTCCATCACTTCAAAGACCAGATAGACTCCTATGAAAAGCTCTATGAGGAGGTGGTCAGCCTGGAACCCACCAAGGTGTTTGATGGCTGGATGCGAGTGGATGTGAGACCCTTCAAAGCATCTCTGCTGAACACAATAAAGAAGTGGAGCCTCATGTTCAAGCAACATCTTGTTGACTTTGTCACAAACAGCCTGTCTGACCTTGACTCATTCATAAGGAGCACCGAGAGTGGTTTGCTCAAGAGGGTGGAGAAAGGAGATTTCCAAGGATTGGTTGAGATCATGGGACATCTTGTCACCCTTAAAGAACGGCAGAGCAGCACCGATGACATGTTTGAGCCCCTGAAGCAAACGATTGAACTGCTGAAGTCCTACGAACAAGAGCTGCCAGAAACCGTGTTTAAGCAACTGGAGGAGCTTCCTGAGAAGTGGAAGAACATGAAGAAGATGGCCATCACTGTGAGGCAACAGGTGGCCCCTCTGCAGGCAAATGAAGTGGCCCTACTCCGCCAGCGGTGCTCAGCCTTCGATGATGAGCAGCAGCAATTCCAGGAGAGGTTCCGCAAAGAGGCCCCTTTCAGGTTTGATAGTATCAATCCACACCAAATGCTGGATGCCTGGCACGTGGAGATCCAGCACATGGAATCCACCATGGCAACCATCTCTAAGTCGGCTGATTTGTTTGAAGTCAATGTTCCTGACTACAAGCAGCTGAGGCAGTGCAGGAAGGAGGCCTGCCAGTTAAAGGAGCTCTGGGACACCATTGGAATGGTGACCTCCAGCATCCGTGCCTGGGAGGCCACCAGCTGGAGGAATATCAGTGTGGAAGCCATGGACTCAGAGTGCAAGCAGTTCGCCCGGCACATCCGCAACCTAGATAAGGAGTTCAGGTCCTGGGATGCATTCACGGGCCTGGAAAGCACAGTGTTGAACACCCTGACGTCCCTGAGGGCTGTGGCGGAGCTGCAGAATCCTGCCATCCGGGATCGGCACTGGAGGCAGCTGATGCAGGCCACCGGGGTGAATTTCACCATGAATCAGGATACCACCTTAGCTCATCTCCTGCAGCTTCAGCTCCACCACTTCGAGGATGAGGTCCGAGGCATTGTGGACAGAGCTGTCAAAGAGATGAGTATGGAGAAGACCTTAAAGGAATTGCAGACTACCTGGGCCAGCATGGAATTCCAGTACGAGTCCCACGCAAGAACCCGCGTACCCTTGCTGCAGTCAGATGAGGATCTCATTGAGGTCCTAGAGGACAATCAAGTGCAACTTCAGAACCTGATGATGTCCAAATATGTTGCTTTCTTCCTGGAAGAAGTGTCGAGCTGGCAGAAGAAGCTGTCCACGGCTGACTCGGTCATCTCTATCTGGTTTGAGGTGCAGCGCACCTGGTCTCACCTAGAGAGCATATTCATTGGCTCAGAAGATATCCGGGCTCAGCTACCCCAGGACTCTAAGAGATTTGAAGGCATTGACTCTGACTTCAGAGAGCTGGCGTATGATGCTCAGAAAACCCCAAATGTGGTGGAAGCCACAAATAAGTCAGGTCTCTATGAAAAGCTGGAGGATATACAAAGCAGATTATGCCTGTGTGAGAAAGCCCTAGCAGAGTATCTAGACACCAAGAGGCTCAGCTTCCCTCGCTTTTACTTCCTGTCCTCCTCTGACCTGCTGGACATCCTTTCCAATGGCACAGCTCCACAACAGGTTCAACGGCACCTCTCCAAGCTCTTTGACAACATGGCTAAGATGCAGTTCCAGTTAGATGCCAGTCAGAACCCAACCAAGACGAGCCTTGGCATGTACAGCAAAGAGGAGGAATATGTGGCCTTCAGTGAGGCCTGTGACTGCAGTGGGCAGGTTGAAATATGGCTGAACCGTGTTCTTCGTCACATGAAAGCCACTGTGAGGCATGAGATGACAGAGGGGGTCACTGCCTATGAGGAAAAGCCCAGGGATCAGTGGCTGTTTGATTACCCGGCTCAGGTGGCTCTGACCTGCACTCAGATCTGGTGGACGACAGAGGTGGGCATTGCATTTGCCAGGCTGGAGGAAGGCTATGAGAGTGCCATGAAGGACTACTATAAGAAGCAAGTGGCCCAACTCAAAACCCTTATCACCATGCTAATTGGGCCGCTCTCCAAGGGGGACAGGCAAAAGATCATGACCATATGCACCATCGATGTGCATGCCCGGGATGTGGTAGCCAAGATGATTGCTCAAAAGGTTGACAATGCCCAGGCTTTCCTCTGGCTGTCACAGCTACGACATCGTTGGGATGATGAGGCCAAGCACTGCTTTGCTAACATCTGTGACGCCCAGTTTCTATATTCCTATGAGTACTTGGGAAATACACCTCGCCTAGTGATCACGCCTCTGACT------GATAGGTGCTACATCACTCTCACCCAGTCTCTACACTTGACCATGAGTGGGGCTCCAGCAGGACCTGCAGGCACAGGCAAGACAGAGACCACCAAGGACCTGGGCAGAGCACTCGGCATCATGGTCTATGTGTTTAACTGTTCTGAGCAGATGGACTACAAGTCCTGTGGCAACATCTACAAAGGCCTGGCTCAGACTGGTGCCTGGGGCTGTTTTGATGAGTTTAACCGAATCTCTGTGGAGGTCTTGTCGGTGGTGGCTGTCCAGGTAAAAAGCATCCAGGATGCAATCAGAGACAAGAAGCAGAGGTTCAGCTTCCTTGGAGAGGAGATTAGCCTTGACCCTTCAGTGGGCATCTTCATTACCATGAACCCAGGCTATGCTGGCCGCACAGAACTGCCAGAGAACCTCAAGGCCCTTTTCAGGCCCTGTGCAATGGTAGTTCCAGACTTTGAGCTGATCTCTGAGATTATGCTGGTAGCAGAAGGATTCATTGAAGCCCGGTTGTTGGCCAGGAAGTTCATTACCCTTTACCGGCTGTGTAAAGAACTTCTCTCCAAACAGGATCACTATGACTGGGGACTTCGTGCCATTAAGTCTGTCCTCGTGGTAGCAGGATCACTGAAACGGGGAGACCCTGACCGCCCAGAGGACCAAGTCCTGATGCGTTCTTTGAGAGACTTCAACATCCCAAAGATCGTGACAGATGACATGCCGGTGTTCATGGGTCTGATAGGTGACCTCTTTCCTGCTTTGGATGTCCCCAGGAAGAGAGATCTGGACTTTGAGGCTGTGGTTCGGAAAGCAATCGTGGACCTTAAGCTCCAGGCTGAGGACAACTTTGTGCTCAAGGTGGTCCAGCTGGAGGAGCTGCTAGCTGTAAGGCACTCTGTG---TTCGTGGTGGGCGGTGCTGGT------ACCGGGAAGTCACAGGTACTGAGGTCTTTACACAAGACCTATCAG------ATCATGAGACGTCGCCCTGTGTGGACTGACCTCAACCCCAAAGCTGTCACAAATGATGAACTCTTT---------------------------------------GGCATCATCAATCCAGCCACTCGAGAATGGAAGGATGGA---CTGTTCTCTTCCATCATGAGAGAGCTTGCCATCATCTCTCATGATGGGCCCAAGTGGATCTTACTGGATGGCGATATAGACCCGATGTGGATAGAGTCTCTGAACACAGTCATGGATGATAACAAGGTACTGACCCTGGCAAGCAACGAGAGAATCCCCCTTAACCCCACAATGCGTCTTCTTTTCGAGATCAGCCACCTGCGCACAGCCACACCAGCAACCGTCTCCAGAGCAGGGATCCTGTACATAAACCCTGCAGACCTGGGATGGAACCCTCCAGTAAGCAGCTGGATTGATCAGAGAGAAGTCCAGACTGAGAGAGCCAACTTGACCATCCTGTTTGACAAATATCTTCCTACCTGCTTGGACACCCTCAGAACCAGATTTAAGAAAATAATTCCAGTCCCAGAGCAGAGTATGATCCAGATGCTGTGCTACCTCCTTGAGTGCCTCCTGACAAAGGAGGATATCCCTGCAGACTGCCCCAAGGAAATATATGAACTCTATTTTGTGTTTGCTGCCATCTGGGCATTTGGCAGTGCTGTGATCCAAGATCAGCTTGTAGACTACCGGGCAGAGTTCAGCAAATGGTGGCTGACTGAGTTTAAAACAGTCAAGTTTCCTTCCCAAGGAACTGTCTTTGACTACTACATAGACCCAGAGACCAAGAAATTTGAGCCCTGGGCCAAGCTCATCCCCCAGTTTGAATTTGACCCAGAGATGCCTTTGCAGGCTTGTTTGGTACACACAAGTGAGACCATCCGGGTGTGCTACTTCATGGAGCGGCTCATGCAATGGAGGCGGCCGGTTATGCTGGTTGGCCCTGCAGGCTCAGGCAAGTCTGTGCTGGTGGGAGCAAAGCTGTCCAGCCTTAACCCTGAGGAATACATGGTGAAAAATGTGCCCTTCAACTACTATACTACGTCAGCAATGCTGCAAGCTGTCTTGGAGAAACCTCTAGAAAAGAAAGCTGGCAGGAATTATGGCCCTCCAGGCAACAGGAAACTCATCTATTTCATCGATGACATGAATATGCCCGAGGTGGATGCCTATGGCACAGTACAGCCCCACACTGTCATCAGGCAGCACCTAGACTATGGCCACTGGTATGATCGGAACAAGCTGTCTCTGAAGGAGATCATGAATGTACAATACATCTCCTGTATGAACCCCACTGCAGGCAGCTTTACCATCAACCCAAGGCTTCAGCGCCACTTCAGCGTGTTTGCCCTCTGCTTCCCAGGAGCTGATGCCCTCTCTTCCATCTATAGCACCATCTTGACCCATCATCTGAAGTTTGGAAACTTTCCCACCACCCTGCAGAAATCCATCCCTCCTCTGATAAACCTGGCTGTCACCTTCCATCAGAAAATTGCCACCACGTTTCTGCCCACAGCAATCAAATTTCACTACATCTTCAATCTCAGAGATTTTGCCAATATTTTCCAAGGCATTCTTTTCTCCTCCGTGGAATGTGTAAAGTCCACACAGGACCTAGTGAAACTCTATCTGCACGAGTCAAGTCGGGTTTATCGGGATAAGATGGTGGAAGAAAAGGATTTCAATCTTTTTGACAAAATCCAAACAGAATTCCTCAAGAAAAATTTTGATGATAGTGAAGAGGTGCTGAAGCAGACCCAGAACCTGAACATGTATTGTCACTTTGCAAATGGCATTGGTGAGCCCAAGTACATGCCTGTGCAATCATGGGACCTTCTGAATCAGACTCTGGTGGAAGCCCTGGAGAGCCACAATGAAGTGAATGCTGTGATGGACCTAGTTCTCTTTGAGGATGCCATACGTCACATCTGTCACATCAACCGAATCCTGGAGTCCCCTCGAGGAAATGCCCTGCTAGTTGGTGTAGGTGGGAGTGGTAAGCAGAGTCTGACAAAGCTGGCAGCTTTTATCAGTTCCATGGATGTATTCCAGATCACCCTTCGCAAAGGCTACCAAATCCCTGACTTCAAGGTGGACCTGGCCAGCCTCTGTCTGAAAGCTGGGGTAAAAAATCTCAGTACAGTGTTCCTTATGACTGATGCCCATGTGGCTGACGAGAGGTTCCTGGTGCTCATCAATGACCTCCTGGCATCTGGTGAGATCCCAGATCTCTACTCTGATGAGGAAGAGGAGAACATCATAAACAATGTGAGAAATGAGGTCAAAAGCCAGGGACTCATGGACAGCAGGGAGAACTGCTGGAAATTCTTCATAGAGAGAGTCCAGCGACAACTTAAGGTGACTCTCTGTTTCTCCCCTGTGGGGAACAAGCTGAGAATTCGAAGCAGGAAGTTCCCAGCCATTGTGAACTGTACTGCTATCAACTGGTTCCATGAGTGGCCTCAGGAGGCCCTAGAGTCTGTGAGCCTCCGATTCTTGCAGAATACAAAGAACATTGAGCCTGCGGTGAAGCAGTCAATTAGCAAGTTCATGGCCTTTGTCCACATAAGTGTCAACAAGACATCCCAGTCATACCTGACCAATGAGCAGCGATACAACTACACAACACCCAAGTCCTTTCTGGAGTTCATCAGACTGTACCAGAGCTTGCTGGAGAGAAATGGAAAAGAGCTCCAGGCCAAGGTGGAGAGGCTGGAGAACGGGCTGTTGAAACTGCACAGCACCTCGGCCCAGGTGGATGATCTGAAAGCGAAGCTTGCCACCCAGGAAGTGGAGCTGAGGCACAAGAATGAAGATACAGACAAGCTGATTCAGGTGGTGGGTGTGGAGACCAGCAAAGTGAGCAGAGAGAAAGCCATTGCTGACGAGGAGGAGCAGAAGGTGGCCCTGATCATGCTGGAGGTGCAGCAGAAACAGAAAGACTGTGAAGAGGACCTGGCTAAGGCCGAGCCAGCCCTGACCGCAGCGCAGGCGGCCCTCAACACTCTCAACAAGACCAACCTGACAGAGCTGAAGTCATTTGGTTCCCCACCTCTGGCTGTCAGCAATGTCAGCGCCGCGGTGATGGTTCTCATGGCCCCAGGGGGCAAGGTGCCCAAGGACCGCAGCTGGAAGGCTGCCAAAATCACCATGGCCAAGGTGGACAGCTTCCTGGATTCCCTAATCCACTTCGACAAGGAGAACATTCATGAGAATTGCCTCAAAGCCATCAGGCCATACCTGCAAGATCCTGCATTCAACCCAGAGTTTGTGGCCACCAAGTCCTATGCAGCTGCAGGCCTCTGCTCTTGGGTAATCAATATTGTGAGGTTCTATGAGGTCTTCTGTGATGTGGAACCAAAGCGCCAGGCTTTGAACAAAGCCACCTCAGACCTCACAACTGCCCAAGAGAAGCTGGCAGCCATCAAAGCCAAGATCACACACCTTAATGAAAACCTGGCGAAGCTCACCACCAAGTTTGAGAAAGCAACAGCAGAGAAGCTCAAGTGTCAGCAAGAAGCTGAACTGACCGCAGGCACCATTTCGCTTGCAAACCGTCTGGTTGGAGGCCTTGCATCTGAGAACATAAGGTGGGCAGAGGCTGTGCAGAACTTCAGACAGCAGGAAAGGACGTTATGTGGCGACATTCTGCTTACTACAGCTTTCATCTCCTACTTGGGCTTCTTTACCAAAAAGTACCGAAAGAGCCTCATGGATGGGACCTGGAGACCCTATCTGAGCCAACTGAAAGTTCCCATTCCAACCACCCCAACTCTGGACCCCCTGAGGATGCTAACCGATGATGCTGAAGTGGCTGCCTGGCAGAATGAGGGTCTCCCTGCTGACCGCATGTCCATGGAGAATGCTACCATCCTCATCAACTGTGAGCGCTGGCCTCTCATGGTCGACCCTCAACTGCAAGGCATTAAATGGATCAAGAACAAATATGGAGAAGAACTCCGGGTCACCCAGATTGGCCAAAAGGGCTGCCTTCAAACCATAGAGCGAGCCCTGGAAGCTGGAGATGTGGTACTGATTGAGAACCTTGAGGAGTCCATTGATCCCGTCCTGGGACCTCTGCTTGGGAGAGAAGTCATTAAGAAAGGACGGTTTATCAAGATTGGAGACAAGGAGTGTGAATTCAATCCCAAGTTCCGGCTCATCCTTCATACCAAGCTGGCCAACCCTCACTACCAGCCTGAGCTGCAGGCTCAGGCTACCCTGATCAACTTCACGGTGACCAGGGATGGCCTGGAGGACCAGCTGCTGGCTGCTGTGGTCAGCATGGAGAGACCAGACCTGGAACAGCTGAAGTCCGATCTCACAAAGCAGCAGAACGGGTTCAAAATCACCCTCAAAACCTTAGAGGACAACCTGCTATCTCGCCTCTCTTCAGCCTCGGGGAACTTCCTGGGAGAAACAGCCTTGGTGGAGAACCTGGAGGTCACCAAGCAGACTGCTGCAGATGTGGAGGAAAAGGTCCAAGAAGCCAAATTGACAGAAGTAAAAATTAATGAGGCCCGAGAGCACTATAGGCCAGCAGCTGCCCGGGCATCTCTGCTCTACTTCATCATGAATGACCTCAGCAAGATCCATCCAATGTATCAGTTCTCCCTCAAGGCCTTCAGCATTGTCTTCCAGAAAGCTGTGGAG------AAGGCAGCTCCCAGTGAAAGTGTCACAGAGCGAGTGACTAATCTAATAGACAGCATAACTTTCTCAGTGTACCAGTATACCACACGTGGCCTCTTTGAGTGTGATAAGCTGACCTACCTAGCCCAGCTCACCTTTCAGATTCTCCTCGTGAACCAGGAAGTTAATGCAGCAGAGTTGGATTTCTTGCTTAGGGCTCCAGTACAGACAGGGACTCCCAGCCCAATGGAGTTCCTGTCCCACCAGGCCTGGGGAGGCATCAAGGCACTCTCATCAATGGAGGAATTCTGCAATCTGGACCGAGACATTGAAGGCTCTGCCAAGAGCTGGAAAAAGTTTGTGGAGTCAGAGTGTCCCGAGAAGGAGAAGTTTCCCCAGGAGTGGAAGAACAAGACAGCCCTGCAGCGCCTCTGCATGATGAGAGCCATGAGGCCTGACCGGATGACCTATGCCATGCGAGATTTTGTTGAGGAGAAGTTGGGAAGCAAATACGTGATGGGAAGAGCACTCGATTTTGTAACCTCATTTGAAGAGTCAGGACCAGCCACTCCCATGTTTTTCATCCTGTCTCCAGGGGTGGATCCACTGAAGGATGTGGAAAATCAAGGAAAGAAACTTGGATATACATTCAACAATCGGAACTTCCACAACGTGTCCCTAGGGCAAGGACAAGAGGTAGTTGCTGAGGCTGCACTGGACTTGGCTGCTAAGAAGGGTCACTGGGTGATTCTGCAGAACATCCACCTGGTGGCCAAGTGGCTCAGTACCCTGGAGAAGAAACTGGAGGAGCTCAGCGAGGAAAGTCACCCAGACTTCAGGGTCTTCATCAGCGCAGAGCCTGCACCCTCCCCTGAGGGCCACATCATTCCCCAGGGCATTCTGGAAAACTCCATTAAGATCACCAATGAGCCTCCCACAGGCATGCACGCCAACTTACACAAAGCCCTGGACAACTTCACTCAGGACACTCTGGAGATGTGTTCCCGGGAGACAGAGTTCAAGACCATCCTCTTTGCTTTGTGCTACTTTCATGCGGTGGTTGCCGAGAGACGGAAGTTTGGGCCACAGGGCTGGAATCGGTCCTATCCATTTAACACTGGGGACCTCACCATCTCTGTGAATGTGCTATATAATTTCCTGGAGGCTAACACAAAGGTACCCTATGACGACTTGCGTTACCTGTTCGGTGAGATCATGTACGGTGGCCATATCACAGATGACTGGGACAGGAGACTCTGCAGAACCTATTTAGAGGAATTCATTCGGCCAGAGATGCTAGAAGGAGAGCTCTCCCTGGCCCCGGGGTTCCCACTCCCAGGAAACATGGACTACAGTGGCTATCACCAGTATATTGATGCTGAGCTGCCCCCTGAGTCTCCCTACCTATATGGCCTCCATCCAAACGCCGAGATTGGCTTCCTGACCCAGACATCGGAAAAACTCTTCCGAACCGTGCTGGAGATGCAGCCTCGGGACAGCCAGGCTGGAGATGGAGCTGGCATCACAAGGGAAGAAAAGGTCAAAACCTTTCTGGAAGAAATACTGGATCGGATGACAGATGAATTTAACATCGCGGAGCTAATGGCTAAGGTGGAGGAACGCACCCCCTACATTGTAGTTGCCCTCCAGGAGTGTGAACGCATGAACATCCTTACCAGAGAGATCCAGCGCTCACTAAGAGAGCTGCATCTTGGCTTACAGGGGGAGCTGACCATGACCAGTGAGATGGAAAACCTACAGAATGCCCTATATCTAGATGTGGTCCCAGAGTCCTGGGCCAGGCGAGCCTACCCTTCCACAGCAGGCCTGGCAGCCTGGTTTCTAGACCTGCTTAACAGAATCAAAGAGCTGGAGTCCTGGACAGGCGACTTCTTGATGCCCTCAACTGTGTGGCTGACGGGCTTCTTCAACCCCCAGTCCTTCCTGACTGCCATCATGCAGTCCATGGCCCGCAAGAATGAATGGCCACTAGACCAGATGGCCCTGCAGTGTGATGTGACAAAGAAGAACAGAGAGGAGTTCCGGAGCCCTCCTCGGGAAGGGGCCTACATCTATGGGCTCTTCATGGAAGGTGCCTGCTGGGACACACAGACTGGGATCATTGCAGAGGCGAAACTGAAGGACCTGACACCCCCCATGCCTGTGATGTTCCTCAAGGCCATTCCAGCAGATAAGCAAGATTGTCGAAGTGTCTATGCTTGTCCTGTGTACAAGACTTGTCAGCGGGGACCCACCTACGTGTGGACTTTCAATCTGAAGACTAAAGAAAACCCATCCAAGTGGGTTCTGGCTGGTGTTGCCTTGCTTCTCCAGATT",
    "Opossum": "ATGCCAGTGAAAGTG------GCGGCGGTGTTCTCCGAGGCTGAGGATGGAGATAGCGAGAAAGGG---CCAGATCCTAGAATACGGCTCCTGGGCACCTACGTGATTCGGTGCTTGCGGCCAGCGGCTGGCGCCTGGAAGCGCTGTGTGAGCACGGCCGAATCTGAGCAGCTGCTCCAGGCCTTCCTGACAGGCTACTGCCCTCAAGAG---------CCG------ATGTTGGTGGTGCGTCCTGGGGGTGGGGGTCTGCAGCTGAGCTCCGGG------------CCATACCCCGATTTGTCTCGAGCCAAAGCGCTGTTCTTCCTGCGC---GGTTCCCCCAATCCCCCAGGACCCTTGGGTCCCAAAGGCGCAGTGCTCTGCGGGGAGCTGGCCCCGGCACCGTTGGAGCATCTGGCCACGCTGGTGGCTGAGGTAGTGATGCCTGTCCTGACCAATGAAAAGAATCACCAAGACTGGCCAAATGCTTTATATCAGGATGTAAGACGACACATCCACCACCTTCAAAGTGACCTCATTATCTTCCTGGGTCAAGTGACAGGAAAAACATTGCTGCCTCTTCCAGTAGGCTCAGAAAACATGGAATGTGCGGATTATGAAAATGAAAAAGACTTGGATTTCACAGATAAATCAATTGTCTATGCTATTGAGTCCACAGTGATCAAATGGAGCCACCAGATTCAAATAGTATTAAAGAGGGAATCTTCAGAACCACTCTTGCAGGGAGGAAATCCCACCCCAAAAGTGGAGCTGGAATTCTGGAAGAGAAGGTGTGAAGATCTGGAATATATTTATAATCAGCTGAGAGCCATAGAAGTGAGGGGCATAGCTGCACTTTTAAACAGACTTCAGAGCAGCTACTTCCCAGCCTTCAAAACCATGTTCAAAGATGTAATAGCAGCTTTAAGAGAGGCCCAGGATATCTATGTACATCTGAAGCCCCTGCAACGCCATGTGGAAAACATAGAGAATGTGGAATTTAGTGAAGTGAAGCCACTGGTGGGACCCCTGCTTCACGTAATCTTCTTGATTTGGTCCACATCTAAATACTACTGCTGTCCAGTACGGATCATTGTGCTACTGCAGGAAATATATAACCTTCTCATTCAGCAGGCCTGTACATATCTTAGCCCAGAAGATCTACTGAAAGGGGAGATAGAAGAGAGTCAGAGGAAAATACAGGTGGTGATGGACATTCTTAACTTTTCCAAAGAGATGTTTGAGAATAGGAGGAACAATCTCAAGACTTACTTCAAACAGGTCCGGGAGGTGAAGGAATGGGATTTCAATTCTTCTATGATCTTTGTGCGACTAAATAACTTCCTCAAAAGACTCAAGATGGTAGAGAATCTTCTGAAGACAACCTTGGATTTTCTCAAGCTAGAGAAACTTGAATTCAGTGGAATTAGAGGGAATGCTTTGAGTCAACAAGTCCAGGGCATGTATGAAGAATTCCAGAATGTGTACAAAGCCTTTTCAGAATGCTTCTATGACTGTCTGGACCCTAAACACACAGAATTCGAAAATTATGTTGCTGAATTCAACCACAAAGTAGAAGATTTGGACCGAAGATTAGGGATGGTCTTCCTCCAAGCTTTTAATGATGTATCTGGCTTAGAGCATGCTTTTAAGTTGCTTGAAATATTTGGGAGTCTGCTTGAAAGACCAGTAGTAGCCATGGATGTATTTGATAAATACCCAAGACTGATCACAATGTTCAGCAGTGACTTGGATGCTGTTAAGACAATCTATTGTCAGCATGTCCAGGAGGAGTCAGAACTTGGGTTTTCCCAATTACACAAGAACATGCCGGCAGTAGCTGGGGGTCTCCGCTGGGCTCAGGAACTCAGAGAACGAATTGAAATTTCATTCAATAACCTAAGACATATCAATCACCCCTATATGGAGTCCACTGAAAGTAAAGAGATGTTCCAAAAGTATGGTGAAATGTTTACATTGTTAGAAAAGTATGAGACAAAACTTTATGATAGCTGGTGCCAGACAGTGTCAGAGAAGTCACAATACAATCTCACTCAGCCACTCCTCTGTCATGATCCAGAAACTAAGCAGATCATTGTCAACTTTAACCCACAG---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAAAC---ATGAAA------------------------------------------------------------------------------------------------------------------------------------------------------CTCTTCCTTGCAGATCCAGCATCCAGTATGTGGAAGGCATACATTGACTACCTTGATAACATGGTTCTTCATGGATTCTTCATTGCTATTGAATCTTCTCTAAATTATCTCCTGGAAAATACTGATTCTAAGAACAGACTTCCTCCCTTTTTTGAAACACAATTGGATCTAGTTATACCAGAACTGATCTTTTGTCCCTCTCTGGACCCTTTTGCTAATGGTGGTTTTCAAAGCATTGTGGAGGGACTTATCAATGACATTTTCAAAATATCTTCTATGGTCCCACGACTT---GCACAGAAAAGTTCCCCTCATTATCAGGCTGACATGGAGGACACAGAAGATCTGTCAAACCTGAGGAATATATTAATGGAGAGAGTCAAGAATATGATGACCATCTGCTGTGACTATCGGAACTCCTTTGACCATTACTCTTTCCTTTACATGGATGACCGAAAGGAATTTATGCGACAATTTCTTTTATATGGACATATGCTCACACCAGAGGATTTAGAAGCCCATGGAGAAGATGAGATCCCTGAAAATCCTCCCACTCTTCAGCAATTTAAATTGCAGATTGATTCCTATGAAAAGATTTATGAAGAGGTGAACCAGCTAGAGCCTTTCAGAATTTTTGACAACTGGATGAAAATTGATATTCGTCCATTTAAGATGACTCTACTAAATGTAATTAAGAGATGGAGCTTCATGCTTAAACAGTATCTCATTGATCATGTCACTCACAGTTTGGCAGACCTTGAAGTCTTTATAAGAAATGGTGAGAGTGGTTTAAACAAGAAGATAGAAAAAGGTGATTTTGCTGGTTTGGTTGAAATTATGAGAACTCTAATGGCTCTTAAAGAACGACAGAGCAGCACTGATGAGATGTTTGAGCCACTGAAACACACAATTGATTTACTGAAGATCTATGAACAAGAATTACCTGATTCAGTATTTAAACAGCTAGAGGAATTGCCAGAAAAATGGAGCAACATAAAGAAGATGGCAGTCATTGTAAAACAGCATGTAGCTCCCCTGCAGGAAGATGAAGTAACAGCTCTCCGTCAAAAATGTGCCATTTTTGCTATTGAACAGAATATATTCCAAGATCAGTTCCACAAAGAAGCTCCTTTCAGGTATGATAGCATTAAGCCTCACCAAGTCTTGGATGCCAAAAACATTCAAATCCAACAGATGGAATCTTTCATGTCCTCCATTTCAGACTCTACAAATTTGTTTGAAGTCAACATTCCTGATTACAAACATTTAAAACAATGCAGGAAGGAGATCTGCTTGTTGAAGGAGCTTTGGGATATGATTGACCTAGTGAATTTTAGCATCAATAACTGGAAGGTAACCAAATGGAGGAATATTAATGTAGAGAATATGGACTTAGAATGTAAAAGTTTTGCCAAACAAATATGGAAACTTGGCAAGGATGTGAGAGCCTGGGATGCTTTCATGGGGCTGGACAACACTCTGAAGAACATCCTAACATCCTTACGGGCTGTGTCTGAACTTCAAAATCCAGCCATAAGAGAGAGGCACTGGAACCAGCTGATGCAGGCTACAGGTATGAAATTCATCATGAATGATGATACCACCCTTGAAGACTTACTTAAGCTTGAGCTACACAACTTTGAAGAGGCGATCCAGGGCATTGTAGACAAAGCTGTGAAAGAGATGGACATGGAAAAAGTTCTAAAGGAACTAAAAGCCACTTGGGCAGGGATGGAATTTCATTATGAGCCCCACCCCCGGACAATGGTCCCACTGATGAGGTCTGATGAAGACCTTATTGAAACTCTTGAAGATAACCAAGTCCAGCTGCAGAATCTAATGACATCCAAGTACATAGCCTTCTTCCTGGAGGAAGTATCTGACTGGCAGAAGAAGCTCTCAATAGCTGATGCTGTCATTTCAATCTGGTTTGAAGTACAGCGTACATGGTCTCATCTTGAAAGCATTTTCATTGGATCTGAAGATATACGAGCACAGCTTCCTAAGGACTCTAAACATTTTGAAGACATTGATACTGATTTTAAAGAGTTAGTTTGTGATGCTCAGAAGACACCAAATGTGGTAGAAGCTACCAACAAACCAGGTGTTCATGAACAATTAGAAGATATTCAGAACAGGTTGAGCCAGTGTGAGAAAGCCTTGACTGAATATCTAGATACCAAGAGACTGGTCTTCCCCAGGTTTTATTTCCTCTCCTCTTCCGACCTTTTAGACATCCTTTCCAACGGCACAAATCCACAACAAGTTCAACGCCATCTTTCTAAGTTATTTGACAGTATGGCTAGGATGAAGTTCCAGGTGGACTCCAGTCAAAAACCAACCAAGAAAAGCCTCGGCATGTATAGCAAAGAGGAAGAGTATGTGAATTTCAGTGAGCCTTGTGACTGTAGTGGCCAGGTTGAACTCTGGTTGAATAATGTGCTCAATCACATGAGGGCCACCGTGAGACATGAAATGACAGAAGGTGTCACTGCCTATGAAGAGAAACCTAGGGAACAGTGGCTCTTTGACTACCCTGCTCAGGTGGCCTTGACATGTACCCAGATCTGGTGGACCACTGAGGTGGGAATTTCCTTTGCCAGGCTGGAAGAAGGATATGAAAGTGCCATGAAGGACTATTATAAGAAACAAGTTACCCAACTGAACACCCTGATCACCATGCTAATTGGCCAGCTCTCCATGGGTGACCGTCAAAAAATCATGACCATCTGTACTATTGATGTGCATGCTCGAGATGTAGTTGCCAAGATGATCTCTCAGAAGGTAGAGAACACTCAGGCTTTTCTCTGGATGTCCCAGATGCGCCATCGGTGGGATGATGAGAAAAAGCACTGCTTTGCCAATATTTGTGATGCTGAATTCCTGTATTCCTATGAGTACCTGGGTAACACACCTCGCCTGGTGATTACCCCTCTGACA------GACAGATGTTACATTACCCTCACCCAGTCTTTGCATTTGACCATGAGTGGGGCTCCAGCAGGACCTGCAGGCACTGGCAAAACTGAGACTACCAAAGACCTGGGTCGAGCATTGGGTATCATGGTATATGTGTTTAACTGTTCTGAGCAAATGGACTACAAGTCTTGTGGCAATATCTACAAAGGCCTTTCTCAGACTGGTGCCTGGGGCTGTTTTGATGAATTTAACAGAATTTCTGTGGAGGTTCTTTCCGTGGTGGCAGTACAGGTGAAAAGTATTCAAGATGCAATCAGAGATAAGAAGCTGAATTTCAATTTCCTTGGAGAGGAGATTAAATTGAATCCCTCAGTGGGAATCTTCATCACTATGAACCCAGGATATGCTGGCCGTACAGAGCTTCCAGAGAACCTTAAGGCTCTCTTCAGGCCTTGTGCTATGGTGGTACCAGATTTTGAATTGATCTGTGAAATCATGTTGGTTGCTGAAGGATTCATCGAGGCACAATCATTGGCCAAAAAATTCATTACCCTTTATCAGCTTTGCAAAGAGCTCCTCTCCAAACAGGACCATTATGACTGGGGTCTGCGAGCTATCAAGTCTGTGCTTGTGGTAGCAGGATCTCTGAAGAGAGGAGACCCTGACCGACCTGAAGACCAAGTCCTAATGCGTTCCCTCAGAGACTTCAATATTCCCAAAATCACAACTGATGACATGCCTGTGTTTATAGGCCTAATTGGAGACCTTTTCCCTGCCCTTGACATCCCCAGAAAGAGAGACCTTCACTTTGAAAGTTTTGTAAAACAGGCAATATTAGAGCTAAAACTTCAGGCTGAGGACAACTTTATGCTCAAAGTGGTACAGCTAGAAGAACTTTTGGCTGTGCGACACTCTGTG---TTTGTGGTGGGGAATGCTGGT------ACAGGAAAATCCCAGGTACTAAGATCTTTGCAAAAGACATACCAG------AACATGAAACGACGACCAGTCTGGACTGATCTCAACCCCAAAGCTGTCACTAATGATGAACTATTT---------------------------------------GGCATCATCAACCCAGCCACAAGAGAATGGAAAGATGGA---CTTTTCTCATCAATCTTGCGTGAACTTGCCAATATAATACATGATGGGCCCAAGTGGATTTTACTAGATGGCGATATTGATCCAATGTGGATTGAATCTTTGAACACTGTCATGGATGATAACAAGGTTCTGACTCTGGCAAGCAATGAAAGAATCTCTCTCAATCCAACTATGCGGCTCCTGTTTGAGATCAACCACTTGCACACAGCTACCCCTGCCACAGTCTCCAGAGCAGGAATACTATACATCAATCCTGCAGACCTGGGTTGGAATCCACCAGTGAGTAGTTGGATTGACAAGAGAGAGATACAGTCTGAACGAGCTAACCTGACCATCCTATTTGACAAATATTTGCCACCTTGCCTGGATGTAGTCAAAACAAGATTTAAAAAGATTGTTCCAATACCAGAGCAGAGCATGATTCAAATGCTGTGTTACATTCTTGAGTGTCTTCTAACAAAGGAAAACAGTCCCCCAGACTCTCCCAAGGAACTTCATGAACTTTATTTTGTGTTTGCTTCTATCTGGGCTTTTGGTGGAGTATTGATCCAAGACCAGCTTGTGGATTACAGAGCAGAGTTCAGTAAATGGTGGATAACTGAATTCAAGACAATCAAGTTTCCTTCCCAAGGAACAATCTTTGACTTTTACATAGAACCAGAAACAAAGAAATTTGAGCCGTGGTCCAAACTTATCCCCAAATTTGAATTTGACCCAGACCTACCTTTGCAGACTTGCCTGGTACACACTGTTGAGACCATTCGTGTGTGCTATTTCATGGAGCAGCTCCTGAAACACCGGAGACCTATCATGTTGGTGGGGAATGCAGGCACTGGGAAATCTGTTCTGGTAGGGGCAAAATTGGCTACCCTGGATGCAGATGAATACATGGTGAAGAATATCCCATTTAATTATTATACTACATCTGCAATGCTACAAGCTGTCCTAGAGAAACCTCTAGAAAAGAAAGCTGGAAGAAATTATGGCCCACCTGGCACCAAGAAACTCATCTATTTCATTGATGATATGAACATGCCTGAGGTAGATGCATATGGAACAGTGCAACCCCACACACTCATCAGACAGCATATGGACTATGGGCACTGGTATGACAGAAATAAACTGTTCCTCAAAGAAATCATGAATGTACAATATGTGGCCTGTATGAATCCGACAGCTGGTGGCTTCTCTATCAATCCTCGGCTACAGCGTCATTTTAGCATCTTTGTGCTCTCCTTCCCTGGAGTAGATGCATTGTATTCAATCTATAGCACCATCTTGACTCAGCATCTAAAACTTGGTAACTTTCCAGCATCACTGCTGAATTCCACCCCCCGACTCATTAACTTGGCCATTACCTTCCATCAGAAGATTGCTGCCACTTTTCTCCCCACAGCAATAAAATTCCATTATATCTTCAATCTCCGAGATTTCTCCAACATTTTCCAAGGCATTCTCTTTTCAACAGTGGAAAGTGTTAAAACCACATCAGACCTTGTGAAGCTCTATCTTCATGAGTCCAATCGGGTTTATCGCGATAAGATGGTTGAAGACAATGACTTTGACAACTTTGATAAAATCCAAATTGAAGTGGTAAATAATTTCTTTGATGATATGGACAAGACTCTAGAGGAAATCAAGAGATTGAATATGTACTGCCACTTTGCAAATGGTATTGGTGAGCCCAAATATATGCCAGTGAAGACATGGGAACTGCTTACCCAAATCCTGGTGGAAGCCTTAGAGAACCACAATGAAGTCAATCAAGTGATGAATCTGGTTCTTTTTGAGGATGCCATGTGCCATGTTTGTCGTATCAATCGCATCCTAGAATCCCCAAGGGGGAATGCTTTGCTGGTTGGAGTAGGTGGAAGTGGCAAACAGAGCCTGACAAGACTTGCAGCTTTTATCAGCTCCATGGATGTTTTCCAGATCACTCTAAGAAAAGGTTATTGTATTTCTGATCTTAAGATGGATTTAGCCAATCAGTGCCTAAAAGCTGGAGTAAAGAATGTAAGCACTGTATTTCTCATGACAGATGCCCAAGTTGCTGATGAAAAGTTCCTTGTCCTTATCAATGATCTGTTGGCATCTGGAGAGATTCCAGATCTATACTCTGATGATGAAGTTGAAAACATCATAAACAATATGAGAAATGAAGTCAAGAGCCTGGGTTTGTTTGACAGCAGGGAAACTTGCTGGAAAGTCTTCATAGAAAGGGTCCAAAAACAATTAAAGGTTATACTCTGCTTCTCCCCTGTGGGGAATAAGCTAAGAGTCCGCAGCAGGAAATTTCCAGCCATTGTGAACTGCACAGCCATTGATTGGTTTCATGAGTGGCCTCAGCAAGCACTAGAATCAGTGAGCCTGCGCTTCTTACAAAACATAGAGAATATTGATCCAGCAGTAAAAGAGTCAATTAGTAAATTCATGGCTTATGTACACACGAGTGTCAACCAAATGTCCCAGTCCTACCTGAGCAATGAACGGCGCTATAATTACACCACCCCAAAATCCTTCCTGGAACAGATCAGACTCTATCAGAACCTACTGGTCAAGAATGGCAATGAGCTGACATCTAAAATGAAGAGGCTTAAGAATGGACTGCAGAAGCTTCACAGTACATCTTCCCAGGTAGATGACCTGAAAGCTAAACTGGCACTCCAGGAAATAGAGCTCAGGCAGAAGAATGAAGATGCAGACAAACTAATTCAAGTGGTTGGAGTAGAGACAGAGAAAGTAAGCAAAGAGAAAGCCATTGCTGATGAAGAGGAACATAAAGTGGCTCTAATCATGTTGGAGGTCAAGCAGAAGCAAAAGGACTGTGAGGAAGATCTGGCCAAAGCAGAGCCATCTCTCACAGCAGCCCAAGAAGCTCTCAACACACTCAATAAGACCAACCTAACAGAGCTCAAGTCTTTTGGTTCACCACCTTTGGCTGTCAGCAATGTCACTGCTGCAGTAATGGTTCTCATGGCCCCTGGAGGGAAGGTACCCAGAGATCGAAGTTGGAAAGCTGCCAAGGCTACCATGGCCCGAGTTGATGGCTTTCTGGACGCCCTGGTCAACTTTAACAAAGAGAACATACCTGAGAGCTGCCTCAAAGCTATCCAACCATATATTCAAGATCCAGAATTTAAACCTGAGTTTGTGGCCTCTAAGTCTTTTGCAGCAGCTGGCCTCTGTTCCTGGGTCATAAATATTGTGAGGTTTTATGAGGTTTTCTGTGATGTGGAACCCAAGCGGCAAGCCCTGAGTAAAGCAAGTTTGGATCTTGCTATTGCCCAAGAAAAATTGGCAACCATTAAAATCAAGATTGCTCACCTTAATGAAAACTTGGCAAAACTTACAACCAAATTTGAGAAAGCAACTGCAGAAAAACTCAAATGTCAGCAAGAAGCTGAACTGACCACAGGTACCATCTCACTTGCAAATCGCCTGGTTGGAGGTCTTGCCTCTGAAAATGTAAGATGGGCAGAAGCTATTAAGGACTTCAGACAGCAAGAGAATACATTGTGTGGAGATATTTTACTGATTACAGCTTTCATTTCTTACCTGGGATACTTTACCAAGAAATATCGTCAAAATCTTATGGATTGTAGCTGGAGACCTTACCTGAATCAGTTAAAAGTGCCTTTTCCAGTCACCCCTACTTTAGATCCTCTAAAGATGCTTACTGATGATGTAGACATAGCCACCTGGCAAAATGAGGGTCTTCCTGCTGACCGTGTGTCCACAGAGAATGCTACAATCCTTATCAACAGTGAACGATGGCCACTTATAGTTGACCCTCAACTACAAGGAGTCAAATGGATTAAGAAGAAATATGGCAAAGACCTTCGAGTCATCCAGATTGGAGAGAAAGGATACCTTGATATCATAGAACATGCCCTAGCAGATGGTGATGTAGTGCTGATTGAAAACATAGGAGAGACAGTGGACCCTGTTCTAGGACCCTTGTTGGGAAGAGAAGTAATTAAAAAAGGAAGATTCATTAAAATTGGAGACAAGGAGTGTGAATATAATCCCAGGTTCCGCCTCATTCTTCACACCAAGCTTGCCAATCCTCACTATCAGCCTGAGCTCCAAGCCCAGGCAACGCTTATCAACTTCACTGTGACTAGAGATGGCTTAGAGGATCAGCTTCTGGCAGCAGTGGTCAACATGGAGAGGCCAGACTTAGAAGAACTGAGATCAGATCTGACAAAGCAACAGAATGCATTCAAGATCACACTGAAAACCTTGGAAGACAACTTGCTGTCTTGCCTCTCCTCCGCATCTGGGAACTTCCTGGGAGATATGGCTTTAGTAGAAAACTTAGAGATCACCAAAAAGACAGCTACAGAAATTCAGAAGAAGGTCCAGGAAGCCAAGATAACAGAAATAAAAATTAATGATGCCAGAGAGCATTACCGGCCAGTGGCAGCACGTGCGTCTTTGCTGTACTTCATCATGAATGACCTCAGTAAAATCCATCCCATGTATCAGTTTTCTCTCAAGGCTTTCAGCCTTGTCTTCCAAAAGGCTATACAGAAGACCAAGGCAGATGCTGATGAGAACCTCCAACAGAGAGTAGTCAGTTTGATTGACAACATCACCTTCTCTGTATATCAGTATACTACTAGGGGACTTTTTGAATGTGATAAGCTGACTTACATCACCCAGGTCACCTTTCAGACACTCATGATGAATCAGGAAATCAATGCTGCTGAATTGGACTTCCTTCTCCGGTACCCAGCACAGTCCAATGTTCTGAGCCCTGTGGATTTCCTTTCCAACCAATCCTGGGGAGGTATCAAGACTCTTTCATTAATGGAAGAATTTTGTAATCTGGATCGAGACATTGAAGGGTCTGCCAAACATTGGAAAAAATTTGTGGACTCAGAGTGTCCTGAAAAGGAGAAATTCCCCCAAGCATGGAAAAACAAGTCATCTCTACAAAAACTGTGTATGATGAGGGCAATGAGACCTGACCGAATGACATATGCTATGAGAGATTTTGTGGAAGAAAAGCTAGGAAGCAAATATGTGGTGGGCAGAGCATTAGATCTCTCTACCTCTTTAGAAGAATCAGGGTCTGCAACTCCCATGTTCTTCATACTGTCGCCAGGTGTTGACCCACTGAAAGATGTGGAGAAGGAAGGGAAGAAACTTGGTTATACCTTCAACAACCAGAACTTCCATAATGTGTCCTTGGGTCAAGGACAAGAGGTAGTAGCAGAGGCTGCATTGGATCTGGCTGCCAAGAATGGCCACTGGGTTATCTTACAGAACATCCATCTGGTAGCCAAATGGCTTGGATCCCTTGAGAAGAAGTTGGAACAACATAGCAAGAGCAGTCATCATGAGTTCAGAGTCTTCATGAGTGCTGAGCCTGCAGCTTCTCCTGATGGTCACATCATTCCCCAAGGCATCCTAGAAAATTCAATAAAGATCACTAATGAGCCTCCCATGGGCATGCATGCCAAACTGCACAAGGCCCTGGACAACTTCACTCAGAATACTCTGGAAATGTGTACCCGAGAAACAGAGTTTAAGAGTATTTTATTTGCACTTTGTTACTTCCACGCTGTTGTAGCAGAAAGGCACAAATTTGGACCTCAAGGCTGGAATCGCACTTACCCTTTTAACACTGGAGACCTTACCATCTCAGTGAATGTGCTCTATAACTTTCTTGAGGCCAATGCCAAGGTACCCTATGATGACTTGCGCTATCTCTTTGGTGAGATCATGTATGGAGGTCACATCACAGATAACTGGGACAGGAGGCTTTGTAGAACATACCTGGAGGAATTCATTAAGCCAGAAATGTTAGAAGGAGAAATGTTTCTGGCTCCAGGGTTCCCGATGCCAGGCAATATGGATTACAACAGTTATCACCAGTTCATTGATGACATGCTGCCAACAGAGTCACCATATTTGTATGGGCTTCACCCCAATGCTGAAATTGGCTTTCTGACCCACACCGCAGAAAAGCTCTTCCATACAGTGCTAGAAATGCAGCCTCGGGACAGCCAAGATGGAGATGGAGGAGGGATCACAAGAGAAGAAAAGGTAAAAGCCTTTCTGGATGAAATATTAGAGAAGATAACTGAAGAGTTTAACATTGCAGAGTTGATGGCTAGGGTAGAAGAACGTACCCCCTACGTTGTGGTTACCTTTCAAGAATGTGAAAGAATGAACCTCCTTATCAGAGAAATACAGCACTCGTTGAAAGAACTGGATCTGAGTCTGAAGGGTGAGCTAACAATGACCAGCAACATGGAGAGCTTACAGAGAGCTCTGTACCTAGATACTGTACCAGCGTCGTGGGCCAAGAAAGCATACCCATCAACAGCAGGCCTGGCAAGCTGGATTGTGGATCTGCTCACCCGAATTAAAGAGCTGGAAAGATGGATGGGAGACTTTGCATTACCCTCTGCTGTCTGGCTAGCAGGATTTTTTAACCCCCAATCATTCCTAACGGCCATCATGCAGTCCATGGCTCATAAGAATAAGTGGCCACTGGATAAGATAGCCCTGCAGTGTGAAGTGACTAAGAAGAATCGGGAAGATTTCAGTAGCCCACCTCGGGAAGGGGCCTATATCTATGGTCTGTTCATGGAAGGGGCCTGTTGGGATACACAGGCTGGAAATATCACAGAGGCTAGACTGAAGGATCTGACTCCACTCATGCCTGTGATATTCATCAAAGCTGTCCCTGTGGACAAGCAAGACAATCGAAATATCTATCCTTGTCCAGTATACAAGACTTGTCAGCGGGGACCCACTTATATTTGGACATTTGGTCTGAAAACTAAAGAAGCTCCATCCAAATGGGTATTAGCTGGTGTGGCCTTGCTTTTGCAGATT",
}


class MakeCachedObjects:
    def __init__(self, model, tree, seq_length, opt_args):
        """simulates an alignment under F81, all models should be the same"""
        self.lf = model.make_likelihood_function(tree)
        self.lf.set_motif_probs(dict(A=0.1, C=0.2, G=0.3, T=0.4))
        self.aln = self.lf.simulate_alignment(seq_length)
        self.results = dict(aln=self.aln)
        self.discrete_tree = make_tree(tip_names=self.aln.names)
        self.opt_args = {**opt_args, **{"show_progress": False}}
        self.tree = tree

    def fit_general(self, **kwargs):
        optargs = self.opt_args.copy()
        # optargs.update(kwargs)
        if "general" in self.results:
            return
        gen = General(DNA.alphabet)
        gen_lf = _make_likelihood(gen, self.tree, self.results)
        gen_lf.optimise(**optargs)
        self.results["general"] = gen_lf
        return

    def fit_gen_stat(self, **kwargs):
        optargs = self.opt_args.copy()
        # optargs.update(kwargs)
        if "gen_stat" in self.results:
            return
        gen_stat = GeneralStationary(DNA.alphabet)
        gen_stat_lf = _make_likelihood(gen_stat, self.tree, self.results)
        gen_stat_lf.optimise(**optargs)
        self.results["gen_stat"] = gen_stat_lf

    def fit_constructed_gen(self, **kwargs):
        optargs = self.opt_args.copy()
        optargs.update(kwargs)
        if "constructed_gen" in self.results:
            return
        preds = [
            MotifChange(a, b, forward_only=True).aliased(f"{a}/{b}")
            for a, b in [
                ["A", "C"],
                ["A", "G"],
                ["A", "T"],
                ["C", "A"],
                ["C", "G"],
                ["C", "T"],
                ["G", "C"],
                ["G", "T"],
                ["T", "A"],
                ["T", "C"],
                ["T", "G"],
            ]
        ]
        nuc = NonReversibleNucleotide(predicates=preds)
        nuc_lf = _make_likelihood(nuc, self.tree, self.results)
        nuc_lf.optimise(**optargs)
        self.results["constructed_gen"] = nuc_lf

    def fit_discrete(self, **kwargs):
        optargs = self.opt_args.copy()
        optargs.update(kwargs)
        if "discrete" in self.results:
            return
        dis_lf = _make_likelihood(
            DiscreteSubstitutionModel(DNA.alphabet),
            self.discrete_tree,
            self.results,
            is_discrete=True,
        )
        dis_lf.optimise(**optargs)
        self.results["discrete"] = dis_lf

    def __call__(self, obj_name, **kwargs):
        if obj_name not in self.results:
            funcs = dict(
                general=self.fit_general,
                gen_stat=self.fit_gen_stat,
                discrete=self.fit_discrete,
                constructed_gen=self.fit_constructed_gen,
            )

            funcs[obj_name](results=self.results, **kwargs)
        return self.results[obj_name]


class NonStatMarkov(TestCase):
    """test discrete and general markov"""

    tree = make_tree(treestring="(a:0.4,b:0.4,c:0.6)")
    opt_args = dict(max_restarts=1, local=True, show_progress=False)
    make_cached = MakeCachedObjects(TimeReversibleNucleotide(), tree, 100000, opt_args)

    def _setup_discrete_from_general(self, gen_lf):
        discrete_tree = self.make_cached.discrete_tree
        dis_lf = _make_likelihood(
            DiscreteSubstitutionModel(DNA.alphabet),
            discrete_tree,
            dict(aln=self.make_cached.aln),
            is_discrete=True,
        )

        for edge in self.tree:
            init = gen_lf.get_psub_for_edge(edge.name)
            dis_lf.set_param_rule("psubs", edge=edge.name, init=init)
        dis_lf.set_motif_probs(gen_lf.get_motif_probs())
        return dis_lf

    def test_discrete_vs_general1(self):
        """compares fully general models"""
        gen_lf = self.make_cached("general", max_evaluations=2)
        gen_lnL = gen_lf.get_log_likelihood()
        dis_lf = self._setup_discrete_from_general(gen_lf)
        assert_allclose(gen_lnL, dis_lf.get_log_likelihood())

    def test_paralinear_consistent_discrete_continuous(self):
        """paralinear masure should be consistent between the two classes"""
        gen_lf = self.make_cached("general", max_evaluations=2)
        dis_lf = self._setup_discrete_from_general(gen_lf)
        ct_para = gen_lf.get_paralinear_metric()
        dt_para = dis_lf.get_paralinear_metric()
        keys = sorted(ct_para)
        assert_allclose([ct_para[k] for k in keys], [dt_para[k] for k in keys])

    def test_general_vs_constructed_general(self):
        """a constructed general lnL should be identical to General"""
        sm_lf = self.make_cached("constructed_gen", max_evaluations=25)
        sm_lnL = sm_lf.get_log_likelihood()
        gen_lf = self.make_cached("general", max_evaluations=0)
        rules = sm_lf.get_param_rules()
        gen_lf.apply_param_rules(rules)
        gen_lnL = gen_lf.get_log_likelihood()
        assert_allclose(sm_lnL, gen_lnL, rtol=0.1)

    def test_general_stationary(self):
        """General stationary should be close to General"""
        gen_stat_lf = self.make_cached("gen_stat", max_evaluations=25)
        gen_lf = self.make_cached("general", max_evaluations=25)
        gen_stat_lnL = gen_stat_lf.get_log_likelihood()
        gen_lnL = gen_lf.get_log_likelihood()
        self.assertLess(gen_stat_lnL, gen_lnL)

    def test_general_stationary_param_list(self):
        """general stationary returns parameter list"""
        gs = GeneralStationary(DNA.alphabet)
        params = gs.get_param_list()
        self.assertTrue(params != [])

    def test_general_stationary_is_stationary(self):
        """should be stationary"""
        gen_stat_lf = self.make_cached("gen_stat")
        mprobs = gen_stat_lf.get_motif_probs()
        mprobs = array([mprobs[nuc] for nuc in DNA.alphabet])
        for edge in self.tree:
            psub = gen_stat_lf.get_psub_for_edge(edge.name)
            pi = dot(mprobs, psub.array)
            assert_allclose(mprobs, pi)

    def test_general_is_not_stationary(self):
        """should not be stationary"""
        gen_lf = self.make_cached("general", max_evaluations=5)
        mprobs = gen_lf.get_motif_probs()
        mprobs = array([mprobs[nuc] for nuc in DNA.alphabet])
        for edge in self.tree:
            psub = gen_lf.get_psub_for_edge(edge.name)
            pi = dot(mprobs, psub.array)
            try:
                assert_allclose(mprobs, pi)
            except AssertionError:
                pass

    def test_strand_symmetric(self):
        """StrandSymmetric should fit a strand symmetric model"""
        warnings.filterwarnings("ignore", "Model not reversible", UserWarning)
        taxa = "Human", "Mouse", "Opossum"
        aln = make_aligned_seqs(data=_aln, moltype=DNA)
        aln = aln[2::3].no_degenerates()
        tree = make_tree(tip_names=taxa)
        model = StrandSymmetric(optimise_motif_probs=True)
        lf = model.make_likelihood_function(tree)
        lf.set_alignment(aln)
        for param, val in [
            ("(A>G | T>C)", 2.454),
            ("(A>T | T>A)", 1.5783),
            ("(C>G | G>C)", 0.6687),
            ("(C>T | G>A)", 6.7026),
            ("(G>T | C>A)", 0.9219),
        ]:
            lf.set_param_rule(param, init=val)

        order = "ACGT"
        S = array([[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 0, 0]])
        P = empty((4, 4))
        for edge in taxa:
            Psub = lf.get_psub_for_edge(edge)
            for i in range(4):
                for j in range(4):
                    P[i, j] = Psub[order[i]][order[j]]
            numpy.testing.assert_almost_equal(P, S.dot(P).dot(S))

    def test_nsGN(self):
        """ssGN correctly ignores provided args"""
        kw = {
            "model_gaps": False,
            "name": "StrandSymmetric",
            "optimise_motif_probs": True,
            "recode_gaps": True,
        }
        StrandSymmetric(**kw)

    def test_nr_nucleotide(self):
        """This is exercising a NonReversibleNucleotide"""
        preds = [
            MotifChange("A", "C", forward_only=True),
            MotifChange("G", "A", forward_only=True),
        ]
        sm = NonReversibleNucleotide(predicates=preds)
        got = sm.get_param_list()
        self.assertEqual(got, ["A>C", "G>A"])

    def test_nr_dinucleotide(self):
        """This is exercising a NonReversibleDinucleotide"""
        preds = [
            MotifChange("A", "C", forward_only=True),
            MotifChange("G", "A", forward_only=True),
            MotifChange("CG", "TG", forward_only=True),
        ]
        sm = NonReversibleDinucleotide(predicates=preds)
        got = sm.get_param_list()
        self.assertEqual(got, ["A>C", "G>A", "CG>TG"])

    def test_nr_trinucleotide(self):
        """This is exercising a NonReversibleTrinucleotide"""
        preds = [
            MotifChange("A", "C", forward_only=True),
            MotifChange("G", "A", forward_only=True),
            MotifChange("CGA", "TGA", forward_only=True),
        ]
        sm = NonReversibleTrinucleotide(predicates=preds)
        got = sm.get_param_list()
        self.assertEqual(got, ["A>C", "G>A", "CGA>TGA"])
        self.assertEqual(len(sm.get_motifs()), 64)

    def test_nr_codon(self):
        """This is exercising a NonReversibleCodon"""
        preds = [
            MotifChange("A", "C", forward_only=True),
            MotifChange("G", "A", forward_only=True),
            MotifChange("CG", "TG", forward_only=True),
            "replacement",
        ]
        sm = NonReversibleCodon(predicates=preds)
        got = sm.get_param_list()
        self.assertEqual(got, ["A>C", "G>A", "CG>TG", "replacement"])

    def test_nr_protein(self):
        """This is exercising a NonReversibleProtein"""
        preds = [
            MotifChange("D", "K", forward_only=True),
            MotifChange("R", "V", forward_only=True),
        ]
        sm = NonReversibleProtein(predicates=preds)
        got = sm.get_param_list()
        self.assertEqual(got, ["D>K", "R>V"])