1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
|
from ete3 import PhyloTree
# Reads a phylogenetic tree (using default species name encoding)
t = PhyloTree("(((Hsa_001,Ptr_001),(Cfa_001,Mms_001)),(Dme_001,Dme_002));")
# /-Hsa_001
# /--------|
# | \-Ptr_001
# /--------|
# | | /-Cfa_001
# | \--------|
#---------| \-Mms_001
# |
# | /-Dme_001
# \--------|
# \-Dme_002
#
# Prints current leaf names and species codes
print("Deafult mode:")
for n in t.get_leaves():
print("node:", n.name, "Species name:", n.species)
# node: Dme_001 Species name: Dme
# node: Dme_002 Species name: Dme
# node: Hsa_001 Species name: Hsa
# node: Ptr_001 Species name: Ptr
# node: Cfa_001 Species name: Cfa
# node: Mms_001 Species name: Mms
#
# We can also use our own leaf name parsing function to obtain species
# names. All we need to do is create a python function that takes
# node's name as argument and return its corresponding species name.
def get_species_name(node_name_string):
# Species code is the first part of leaf name (separated by an
# underscore character)
spcode = node_name_string.split("_")[0]
# We could even translate the code to complete names
code2name = {
"Dme":"Drosophila melanogaster",
"Hsa":"Homo sapiens",
"Ptr":"Pan troglodytes",
"Mms":"Mus musculus",
"Cfa":"Canis familiaris"
}
return code2name[spcode]
# Now, let's ask the tree to use our custom species naming function
t.set_species_naming_function(get_species_name)
print("Custom mode:")
for n in t.get_leaves():
print("node:", n.name, "Species name:", n.species)
# node: Dme_001 Species name: Drosophila melanogaster
# node: Dme_002 Species name: Drosophila melanogaster
# node: Hsa_001 Species name: Homo sapiens
# node: Ptr_001 Species name: Pan troglodytes
# node: Cfa_001 Species name: Canis familiaris
# node: Mms_001 Species name: Mus musculus
#
# Of course, you can disable the automatic generation of species
# names. To do so, you can set the species naming function to
# None. This is useful to set the species names manually or for
# reading them from a newick file. Other wise, species attribute would
# be overwriten
mynewick = """
(((Hsa_001[&&NHX:species=Human],Ptr_001[&&NHX:species=Chimp]),
(Cfa_001[&&NHX:species=Dog],Mms_001[&&NHX:species=Mouse])),
(Dme_001[&&NHX:species=Fly],Dme_002[&&NHX:species=Fly]));
"""
t = PhyloTree(mynewick, sp_naming_function=None)
print("Disabled mode (manual set):")
for n in t.get_leaves():
print("node:", n.name, "Species name:", n.species)
# node: Dme_001 Species name: Fly
# node: Dme_002 Species name: Fly
# node: Hsa_001 Species name: Human
# node: Ptr_001 Species name: Chimp
# node: Cfa_001 Species name: Dog
# node: Mms_001 Species name: Mouse
#
# Of course, once this info is available you can query any internal
# node for species covered.
human_mouse_ancestor = t.get_common_ancestor("Hsa_001", "Mms_001")
print("These are the species under the common ancestor of Human & Mouse")
print('\n'.join( human_mouse_ancestor.get_species() ))
# Mouse
# Chimp
# Dog
# Human
|