File: _TCoffee.py

package info (click to toggle)
python-biopython 1.68%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 46,860 kB
  • ctags: 13,237
  • sloc: python: 160,306; xml: 93,216; ansic: 9,118; sql: 1,208; makefile: 155; sh: 63
file content (107 lines) | stat: -rw-r--r-- 4,570 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# Copyright 2009 by Cymon J. Cox and Brad Chapman. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
"""Command line wrapper for the multiple alignment program TCOFFEE."""

from __future__ import print_function

from Bio.Application import _Option, _Switch, AbstractCommandline


class TCoffeeCommandline(AbstractCommandline):
    """Commandline object for the TCoffee alignment program.

    http://www.tcoffee.org/Projects_home_page/t_coffee_home_page.html

    The T-Coffee command line tool has a lot of switches and options.
    This wrapper implements a VERY limited number of options - if you
    would like to help improve it please get in touch.

    Example:
    --------

    To align a FASTA file (unaligned.fasta) with the output in ClustalW
    format (file aligned.aln), and otherwise default settings, use:

    >>> from Bio.Align.Applications import TCoffeeCommandline
    >>> tcoffee_cline = TCoffeeCommandline(infile="unaligned.fasta",
    ...                                    output="clustalw",
    ...                                    outfile="aligned.aln")
    >>> print(tcoffee_cline)
    t_coffee -output clustalw -infile unaligned.fasta -outfile aligned.aln

    You would typically run the command line with tcoffee_cline() or via
    the Python subprocess module, as described in the Biopython tutorial.

    Citation:
    ---------

    T-Coffee: A novel method for multiple sequence alignments.
    Notredame, Higgins, Heringa, JMB,302(205-217) 2000

    Last checked against: Version_6.92
    """
    SEQ_TYPES = ["dna", "protein", "dna_protein"]

    def __init__(self, cmd="t_coffee", **kwargs):
        self.parameters = [
            _Option(["-output", "output"],
                    """Specify the output type.

                    One (or more separated by a comma) of:
                    'clustalw_aln', 'clustalw', 'gcg', 'msf_aln',
                    'pir_aln', 'fasta_aln', 'phylip', 'pir_seq', 'fasta_seq'

                    Note that of these Biopython's AlignIO module will only
                    read clustalw, pir, and fasta.
                    """,  # TODO - Can we read the PHYLIP output?
                    equate=False),
            _Option(["-infile", "infile"],
                    "Specify the input file.",
                    filename=True,
                    is_required=True,
                    equate=False),
            # Indicates the name of the alignment output by t_coffee. If the
            # default is used, the alignment is named <your sequences>.aln
            _Option(["-outfile", "outfile"],
                    "Specify the output file. Default: <your sequences>.aln",
                    filename=True,
                    equate=False),
            _Switch(["-convert", "convert"],
                    "Specify you want to perform a file conversion"),
            _Option(["-type", "type"],
                    "Specify the type of sequence being aligned",
                    checker_function=lambda x: x in self.SEQ_TYPES,
                    equate=False),
            _Option(["-outorder", "outorder"],
                    "Specify the order of sequence to output"
                    "Either 'input', 'aligned' or <filename> of "
                    "Fasta file with sequence order",
                    equate=False),
            _Option(["-matrix", "matrix"],
                    "Specify the filename of the substitution matrix to use."
                    "Default: blosum62mt",
                    equate=False),
            _Option(["-gapopen", "gapopen"],
                    "Indicates the penalty applied for opening a gap "
                    "(negative integer)",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            _Option(["-gapext", "gapext"],
                    "Indicates the penalty applied for extending a gap "
                    "(negative integer)",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            _Switch(["-quiet", "quiet"],
                    "Turn off log output"),
            _Option(["-mode", "mode"],
                    "Specifies a special mode: genome, quickaln, dali, 3dcoffee",
                    equate=False),
            ]
        AbstractCommandline.__init__(self, cmd, **kwargs)


if __name__ == "__main__":
    from Bio._utils import run_doctest
    run_doctest()