1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
|
.. jupyter-execute::
:hide-code:
import set_working_directory
.. _calculating-pairwise-distances:
Calculate pairwise distances between sequences
==============================================
.. sectionauthor:: Gavin Huttley
An example of how to calculate the pairwise distances for a set of sequences.
.. jupyter-execute::
from cogent3 import load_aligned_seqs
from cogent3.evolve import distance
Import a substitution model (or create your own)
.. jupyter-execute::
from cogent3.evolve.models import HKY85
Load my alignment
.. jupyter-execute::
al = load_aligned_seqs("data/long_testseqs.fasta")
Create a pairwise distances object with your alignment and substitution model and run it.
.. jupyter-execute::
d = distance.EstimateDistances(al, submodel=HKY85())
d.run(show_progress=False)
d.get_pairwise_distances()
Note that pairwise distances can be distributed for computation across multiple CPU's. In this case, when statistics (like distances) are requested only the master CPU returns data.
We'll write a phylip formatted distance matrix.
.. jupyter-execute::
d.write("dists_for_phylo.phylip", format="phylip")
.. todo:: write out in json format
We'll also save the distances to file in Python's pickle format.
.. jupyter-execute::
import pickle
with open("dists_for_phylo.pickle", "wb") as f:
pickle.dump(d.get_pairwise_distances(), f)
.. clean up
.. jupyter-execute::
:hide-code:
import os
for file_name in "dists_for_phylo.phylip", "dists_for_phylo.pickle":
os.remove(file_name)
|