File: baseml.py

package info (click to toggle)
python-biopython 1.68%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 46,860 kB
  • ctags: 13,237
  • sloc: python: 160,306; xml: 93,216; ansic: 9,118; sql: 1,208; makefile: 155; sh: 63
file content (192 lines) | stat: -rw-r--r-- 8,468 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# Copyright (C) 2011 by Brandon Invergo (b.invergo@gmail.com)
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.

import os
import os.path
from ._paml import Paml, _relpath
from . import _parse_baseml


class BasemlError(EnvironmentError):
    """BASEML has failed. Run with verbose = True to view BASEML's error
    message"""


class Baseml(Paml):
    """This class implements an interface to BASEML, part of the PAML package."""

    def __init__(self, alignment=None, tree=None, working_dir=None,
                out_file=None):
        """Initialize the Baseml instance.

        The user may optionally pass in strings specifying the locations
        of the input alignment and tree files, the working directory and
        the final output file.
        """
        Paml.__init__(self, alignment, working_dir, out_file)
        if tree is not None:
            if not os.path.exists(tree):
                raise IOError("The specified tree file does not exist.")
        self.tree = tree
        self.ctl_file = "baseml.ctl"
        self._options = {"noisy": None,
                         "verbose": None,
                         "runmode": None,
                         "model": None,
                         "model_options": None,
                         "Mgene": None,
                         "ndata": None,
                         "clock": None,
                         "fix_kappa": None,
                         "kappa": None,
                         "fix_alpha": None,
                         "alpha": None,
                         "Malpha": None,
                         "ncatG": None,
                         "fix_rho": None,
                         "rho": None,
                         "nparK": None,
                         "nhomo": None,
                         "getSE": None,
                         "RateAncestor": None,
                         "Small_Diff": None,
                         "cleandata": None,
                         "icode": None,
                         "fix_blength": None,
                         "method": None}

    def write_ctl_file(self):
        """Dynamically build a BASEML control file from the options.

        The control file is written to the location specified by the
        ctl_file property of the baseml class.
        """
        # Make sure all paths are relative to the working directory
        self._set_rel_paths()
        if True:  # Dummy statement to preserve indentation for diff
            with open(self.ctl_file, 'w') as ctl_handle:
                ctl_handle.write("seqfile = %s\n" % self._rel_alignment)
                ctl_handle.write("outfile = %s\n" % self._rel_out_file)
                ctl_handle.write("treefile = %s\n" % self._rel_tree)
                for option in self._options.items():
                    if option[1] is None:
                        # If an option has a value of None, there's no need
                        # to write it in the control file; it's normally just
                        # commented out.
                        continue
                    if option[0] == "model_options":
                        continue
                    # If "model" is 9 or 10, it may be followed in the cotnrol
                    # file by further options such as
                    # [1 (TC CT AG GA)]
                    # or
                    # [5 (AC CA) (AG GA) (AT TA) (CG GC) (CT TC)]
                    # which are to be stored in "model_options" as a string.
                    if option[0] == "model" and option[1] in [9, 10]:
                        if self._options["model_options"] is not None:
                            ctl_handle.write("model = %s  %s" % (option[1],
                                            self._options["model_options"]))
                            continue
                    ctl_handle.write("%s = %s\n" % (option[0], option[1]))

    def read_ctl_file(self, ctl_file):
        """Parse a control file and load the options into the Baseml instance.
        """
        temp_options = {}
        if not os.path.isfile(ctl_file):
            raise IOError("File not found: %r" % ctl_file)
        else:
            with open(ctl_file) as ctl_handle:
                for line in ctl_handle:
                    line = line.strip()
                    uncommented = line.split("*", 1)[0]
                    if uncommented != "":
                        if "=" not in uncommented:
                            raise AttributeError(
                                "Malformed line in control file:\n%r" % line)
                        (option, value) = uncommented.split("=")
                        option = option.strip()
                        value = value.strip()
                        if option == "seqfile":
                            self.alignment = value
                        elif option == "treefile":
                            self.tree = value
                        elif option == "outfile":
                            self.out_file = value
                        elif option not in self._options:
                            raise KeyError("Invalid option: %s" % option)
                        elif option == "model":
                            if len(value) <= 2 and value.isdigit():
                                temp_options["model"] = int(value)
                                temp_options["model_options"] = None
                            else:
                                model_num = value.partition(" ")[0]
                                model_opt = value.partition(" ")[2].strip()
                                temp_options["model"] = int(model_num)
                                temp_options["model_options"] = model_opt
                        else:
                            if "." in value or "e-" in value:
                                try:
                                    converted_value = float(value)
                                except:
                                    converted_value = value
                            else:
                                try:
                                    converted_value = int(value)
                                except:
                                    converted_value = value
                            temp_options[option] = converted_value
        for option in self._options:
            if option in temp_options:
                self._options[option] = temp_options[option]
            else:
                self._options[option] = None

    def _set_rel_paths(self):
        """Convert all file/directory locations to paths relative to the current
        working directory.

        BASEML requires that all paths specified in the control file be
        relative to the directory from which it is called rather than
        absolute paths.
        """
        Paml._set_rel_paths(self)
        if self.tree is not None:
            self._rel_tree = _relpath(self.tree, self.working_dir)

    def run(self, ctl_file=None, verbose=False, command="baseml",
                parse=True):
        """Run baseml using the current configuration and then parse the results.

        Return a process signal so the user can determine if
        the execution was successful (return code 0 is successful, -N
        indicates a failure). The arguments may be passed as either
        absolute or relative paths, despite the fact that BASEML
        requires relative paths.
        """
        if self.tree is None:
            raise ValueError("Tree file not specified.")
        if not os.path.exists(self.tree):
            raise IOError("The specified tree file does not exist.")
        Paml.run(self, ctl_file, verbose, command)
        if parse:
            results = read(self.out_file)
        else:
            results = None
        return results


def read(results_file):
    """Parse a BASEML results file."""
    results = {}
    if not os.path.exists(results_file):
        raise IOError("Results file does not exist.")
    with open(results_file) as handle:
        lines = handle.readlines()
    (results, num_params) = _parse_baseml.parse_basics(lines, results)
    results = _parse_baseml.parse_parameters(lines, results, num_params)
    if results.get("version") is None:
        raise ValueError("Invalid results file")
    return results