File: ebola.py

package info (click to toggle)
python-treetime 0.11.4-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 21,316 kB
  • sloc: python: 8,437; sh: 124; makefile: 49
file content (82 lines) | stat: -rw-r--r-- 2,780 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
'''
this script illustrates the use of treetime to analyze viral sequences.
As an example, we use Ebola virus sequences from the 2014-2015 outbreak in
West Africa. This example contains more than 300 sequences, it will take
a few minutes to run.
'''

from __future__ import print_function, division
import sys
import numpy as np
from Bio import Phylo

import matplotlib.pyplot as plt
from matplotlib import cm
try:
    import seaborn as sns
    sns.set_style('whitegrid')
except:
    print ("Seaborn not found. Default style will be used for the plots")

from treetime import TreeTime
from treetime.utils import parse_dates
import treetime.config as ttconf

if __name__ == '__main__':


    plt.ion()
    base_name = '../data/ebola/ebola'
    dates = parse_dates(base_name+'.metadata.csv')
    # instantiate treetime
    ebola = TreeTime(gtr='Jukes-Cantor', tree = base_name+'.nwk', precision=1,
                     aln = base_name+'.fasta', verbose = 2, dates = dates)

    # infer an ebola time tree while rerooting and resolving polytomies
    res = ebola.run(root='best', infer_gtr=True, relaxed_clock=False, max_iter=2,
                    branch_length_mode='input', n_iqd=3, resolve_polytomies=True,
                    Tc='skyline', time_marginal="assign", vary_rate=True)

    if res==ttconf.ERROR:
        sys.exit()

    # scatter root to tip divergence vs sampling date
    ebola.plot_root_to_tip(add_internal=True)
    plt.legend(loc=2)

    # rescale branch length to years and plot in axis 0
    from treetime.treetime import plot_vs_years
    fig, axs = plt.subplots(1,2, sharey=True, figsize=(12,8))
    plot_vs_years(ebola, step=1, ax=axs[1], confidence=(0.05,0.95), label_func = lambda x:"")
    axs[1].set_xlim(0, 2.5)
    axs[1].set_title("time tree")

    # assign mutation length to branch length and plot the mutation tree in axs[1]
    axs[0].set_title("mutation tree")
    for n in ebola.tree.find_clades():
        n.branch_length=n.mutation_length
    Phylo.draw(ebola.tree, label_func=lambda x:"", axes=axs[0])
    plt.tight_layout()

    # reset branch length to time (in substitution rate units)
    for n in ebola.tree.find_clades():
        n.branch_length=n.clock_length

    # OUTPUT the GTR model
    print(ebola.gtr)

    # plot Skyline
    skyline, confidence = ebola.merger_model.skyline_inferred(gen=50, confidence=2.0)
    skyline_empirical = ebola.merger_model.skyline_empirical(gen=50)
    plt.figure()
    plt.fill_between(skyline.x, confidence[0], confidence[1], color=(0.8, 0.8, 0.8))
    plt.plot(skyline.x, skyline.y, label='maximum likelihood skyline')
    plt.plot(skyline_empirical.x, skyline_empirical.y, label='empirical skyline')
    plt.yscale('log')
    plt.legend()
    plt.ticklabel_format(axis='x',useOffset=False)