1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
|
.. jupyter-execute::
:hide-code:
import set_working_directory
Select `n` sequences from a collection
--------------------------------------
Let's load an alignment of primates to use in examples.
.. jupyter-execute::
:raises:
from cogent3 import get_app
loader = get_app("load_aligned", moltype="dna")
aln = loader("data/primate_brca1.fasta")
aln
Select the first `n` sequences from an alignment
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Initialising ``take_n_seqs`` with the argument ``number=3`` creates an app that returns the first 3 sequences from an alignment
.. note:: "first n" refers to the ordering in the fasta file.
.. jupyter-execute::
:raises:
from cogent3 import get_app
first_3 = get_app("take_n_seqs", number=3)
first_3(aln)
Randomly selecting `n` sequences from an alignment
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Using ``random=True`` and ``number=3`` returns 3 random sequences. An optional argument for a ``seed`` can be provided to ensure the same sequences are returned each time the app is called.
.. jupyter-execute::
:raises:
from cogent3 import get_app
random_n = get_app("take_n_seqs", random=True, number=3, seed=1)
random_n(aln)
Selecting the same sequences from multiple alignments
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Providing the argument ``fixed_choice=True`` ensures the same sequences are returned when (randomly) sampling sequences across several alignments.
.. jupyter-execute::
:raises:
from cogent3 import get_app
loader = get_app("load_aligned", moltype="dna")
aln1 = loader("data/primate_brca1.fasta")
aln2 = loader("data/brca1.fasta")
aln1.names
.. jupyter-execute::
:raises:
aln2.names
.. jupyter-execute::
:raises:
fixed_choice = get_app("take_n_seqs", number=2, random=True, fixed_choice=True)
result1 = fixed_choice(aln1).names
result2 = fixed_choice(aln2).names
result1 == result2
|