1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
|
# Copyright 2009 by Cymon J. Cox. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Command line wrapper for the multiple alignment program PROBCONS.
"""
from __future__ import print_function
from Bio.Application import _Option, _Switch, _Argument, AbstractCommandline
class ProbconsCommandline(AbstractCommandline):
"""Command line wrapper for the multiple alignment program PROBCONS.
http://probcons.stanford.edu/
Example:
--------
To align a FASTA file (unaligned.fasta) with the output in ClustalW
format, and otherwise default settings, use:
>>> from Bio.Align.Applications import ProbconsCommandline
>>> probcons_cline = ProbconsCommandline(input="unaligned.fasta",
... clustalw=True)
>>> print(probcons_cline)
probcons -clustalw unaligned.fasta
You would typically run the command line with probcons_cline() or via
the Python subprocess module, as described in the Biopython tutorial.
Note that PROBCONS will write the alignment to stdout, which you may
want to save to a file and then parse, e.g.::
stdout, stderr = probcons_cline()
with open("aligned.aln", "w") as handle:
handle.write(stdout)
from Bio import AlignIO
align = AlignIO.read("aligned.fasta", "clustalw")
Alternatively, to parse the output with AlignIO directly you can
use StringIO to turn the string into a handle::
stdout, stderr = probcons_cline()
from StringIO import StringIO
from Bio import AlignIO
align = AlignIO.read(StringIO(stdout), "clustalw")
Citations:
----------
Do, C.B., Mahabhashyam, M.S.P., Brudno, M., and Batzoglou, S. 2005.
PROBCONS: Probabilistic Consistency-based Multiple Sequence Alignment.
Genome Research 15: 330-340.
Last checked against version: 1.12
"""
def __init__(self, cmd="probcons", **kwargs):
self.parameters = \
[
# Note that some options cannot be assigned via properties using the
# original documented option (because hyphens are not valid for names in
# python), e.g cmdline.pre-training = 3 will not work
# In these cases the shortened option name should be used
# cmdline.pre = 3
_Switch(["-clustalw", "clustalw"],
"Use CLUSTALW output format instead of MFA"),
_Option(["-c", "c", "--consistency", "consistency"],
"Use 0 <= REPS <= 5 (default: 2) passes of consistency transformation",
checker_function=lambda x: x in range(0, 6),
equate=False),
_Option(["-ir", "--iterative-refinement", "iterative-refinement", "ir"],
"Use 0 <= REPS <= 1000 (default: 100) passes of "
"iterative-refinement",
checker_function=lambda x: x in range(0, 1001),
equate=False),
_Option(["-pre", "--pre-training", "pre-training", "pre"],
"Use 0 <= REPS <= 20 (default: 0) rounds of pretraining",
checker_function=lambda x: x in range(0, 21),
equate=False),
_Switch(["-pairs", "pairs"],
"Generate all-pairs pairwise alignments"),
_Switch(["-viterbi", "viterbi"],
"Use Viterbi algorithm to generate all pairs "
"(automatically enables -pairs)"),
_Switch(["-verbose", "verbose"],
"Report progress while aligning (default: off)"),
_Option(["-annot", "annot"],
"Write annotation for multiple alignment to FILENAME",
equate=False),
_Option(["-t", "t", "--train", "train"],
"Compute EM transition probabilities, store in FILENAME "
"(default: no training)",
equate=False),
_Switch(["-e", "e", "--emissions", "emissions"],
"Also reestimate emission probabilities (default: off)"),
_Option(["-p", "p", "--paramfile", "paramfile"],
"Read parameters from FILENAME",
equate=False),
_Switch(["-a", "--alignment-order", "alignment-order", "a"],
"Print sequences in alignment order rather than input "
"order (default: off)"),
# Input file name
_Argument(["input"],
"Input file name. Must be multiple FASTA alignment "
"(MFA) format",
filename=True,
is_required=True),
]
AbstractCommandline.__init__(self, cmd, **kwargs)
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()
|