1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
|
# Copyright 2007 by Tiago Antao <tiagoantao@gmail.com>.
# Revisions copyright 2014 by Melissa Gymrek <mgymrek@mit.edu>.
# All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""This module allows you to control Simcoal2 and FastSimcoal (DEPRECATED)."""
import os
import sys
from Bio.Application import AbstractCommandline, _Option, _Switch
class SimCoalController(object):
def __init__(self, simcoal_dir):
"""Initializes the controller. (DEPRECATED)
simcoal_dir is the directory where simcoal is.
The initializer checks for existence and executability of binaries.
"""
self.simcoal_dir = simcoal_dir
self.os_name = os.name # remove this?
dir_contents = os.listdir(self.simcoal_dir)
# We expect the tool to be installed as simcoal2(.exe)
# without any trailing version number.
self.bin_name = "simcoal2"
if self.bin_name not in dir_contents:
# Try case insensitive,
dir_contents = [x.lower() for x in dir_contents]
if self.bin_name not in dir_contents:
# Try with .exe
self.bin_name += '.exe'
if self.bin_name not in dir_contents:
raise IOError("SimCoal not available")
if not os.access(os.path.join(self.simcoal_dir, self.bin_name),
os.X_OK):
raise IOError("SimCoal not executable")
def run_simcoal(self, par_file, num_sims, ploydi='1', par_dir='.'):
"""Executes SimCoal.
"""
if par_dir is None:
par_dir = os.sep.join([".", 'SimCoal', 'runs'])
curr_dir = os.getcwd()
# TODO - Make sure we change drive on Windows as well?
os.chdir(par_dir)
exe = os.path.join(self.simcoal_dir, self.bin_name)
if " " in exe:
exe = '"' + exe + '"'
cmd = exe + ' ' + par_file + ' ' + str(num_sims) + ' ' + ploydi
# TODO - Better way to spot if on Jython on Windows?
if sys.platform == "win32" or self.bin_name.endswith(".exe"):
# There is no /dev/nul on Windows
cmd += ' > nul 2>nul'
else:
cmd += ' >/dev/null 2>&1'
os.system(cmd)
os.chdir(curr_dir)
class _FastSimCoalCommandLine(AbstractCommandline):
""" Command Line Wrapper for Fastsimcoal
"""
def __init__(self, fastsimcoal_dir=None, cmd='fastsimcoal', **kwargs):
self.parameters = [
_Option(["-i", "--ifile", "parfile"], "Name of the parameter file",
filename=True, equate=False, is_required=False,
checker_function=lambda x: isinstance(x, str)),
_Option(["-n", "--numsims", "numsims"], "Number of simulations to perform",
filename=False, equate=False, is_required=True,
checker_function=lambda x: isinstance(x, int)),
_Option(["-t", "--tfile", "tfile"], "Name of template parameter file",
filename=True, equate=False, is_required=False,
checker_function=lambda x: isinstance(x, str)),
_Option(["-f", "--dfile", "dfile"], "Name of parameter definition file",
filename=True, equate=False, is_required=False,
checker_function=lambda x: isinstance(x, str)),
_Option(["-F", "--dFile", "dFile"],
"""Same as -f but only uses simple parameters defined
in the template file. Complex params are recomputed""",
filename=True, equate=False, is_required=False,
checker_function=lambda x: isinstance(x, str)),
_Option(["-e", "--efile", "efile"],
"""Parameter prior definition file.
Parameters drawn from specified distributions are
substituted into template file.""",
filename=True, equate=False, is_required=False,
checker_function=lambda x: isinstance(x, str)),
_Option(["-E", "--numest", "numest"],
"""Number of estimations from parameter priors.
Listed parameter values are substituted in template file.""",
filename=False, equate=False, is_required=False,
checker_function=lambda x: isinstance(x, int)),
_Switch(["-g", "--genotypic", "genotypic"], "Generates Arlequin projects with genotypic data"),
_Switch(["-p", "--phased", "phased"], "Specifies that phase is known in Arlequin output"),
_Option(["-s", "--dnatosnp", "dnatosnp"],
""""Output DNA as SNP data (0: ancestral, 1: derived
and specify maximum no. SNPs to output.""",
filename=False, equate=False, is_required=False,
checker_function=lambda x: isinstance(x, int)),
_Switch(["-S", "--allsites", "allsites"],
"""Output the whole DNA sequence, including monomorphic sites"""),
_Switch(["-I", "--inf", "inf"],
"""Generates DNA mutations according to an
infinite sites (IS) mutation model."""),
_Switch(["-d", "--dsfs", "dsfs"], "Computes derived site frequency spectrum"),
_Switch(["-m", "--msfs", "msfs"], "Computes minor site frequency spectrum"),
_Option(["-o", "--oname", "oname"], "Generic name for observed SFS files",
filename=False, equate=False, is_required=False,
checker_function=lambda x: isinstance(x, str)),
_Switch(["-H", "--header", "header"], "Generates header in site frequency spectrum files."),
_Switch(["-q", "--quiet", "quiet"], "Minimal messages output to console"),
_Switch(["-T", "--tree", "tree"], "Output coalescent tree in nexus format."),
_Option(["-k", "--keep", "keep"],
"""Number of simulated polymorphic sites kept in memory.
If the simulated no. is larger, then temporary files are created.""",
filename=False, equate=False, is_required=False,
checker_function=lambda x: isinstance(x, int)),
_Option(["--seed", "seed"], "Seed for the random number generator (positive int <=1E6)",
filename=False, equate=False, is_required=False,
checker_function=lambda x: isinstance(x, int)),
_Switch(["-x", "--noarloutput", "noarloutput"], "Does not generate Arlequin output"),
_Switch(["-D", "--dadioutput", "dadioutput"], "Output SFS in dadi format"),
_Option(["-M", "--maxlhood", "maxlhood"],
"""Perform parameter estimation by max lhood from SFS, and
define stop criterion as min., rel., diff. in parameter
values between iterations""",
filename=False, equate=False, is_required=False,
checker_function=lambda x: isinstance(x, float)),
_Option(["-N", "--maxnumsims", "maxnumsims"],
"""Maximum number of simulations to perform during
likelihood maximization.""",
filename=False, equate=False, is_required=False,
checker_function=lambda x: isinstance(x, int)),
_Option(["-l", "--minnumloops", "minnumloops"],
"""Minimum number of iteration loops to perform during
likelihood maximization.""",
filename=False, equate=False, is_required=False,
checker_function=lambda x: isinstance(x, int)),
_Option(["-L", "--maxnumloops", "maxnumloops"],
"""Maximum number of iterations to perform during
likelihood maximization""",
filename=False, equate=False, is_required=False,
checker_function=lambda x: isinstance(x, int)),
_Option(["-C", "--minSFSCount", "minSFSCount"],
"""Minimum observed SFS entry count taken into account
in likelihood computation""",
filename=False, equate=False, is_required=False,
checker_function=lambda x: isinstance(x, int)),
_Switch(["-0", "--removeZeroSFS", "removeZeroSFS"],
"""Do not take into account monomorphic sites for
SFS likelihood computation."""),
_Option(["-a", "--ascDeme", "ascDeme"],
"""This is the deme id where ascertainment is performed
when simulating SNPs.""",
filename=False, equate=False, is_required=False,
checker_function=lambda x: isinstance(x, int)),
_Option(["-A", "--ascSize", "ascSize"],
"""Number of ascertained chromosomes used to define SNPs in
a given deme.""",
filename=False, equate=False, is_required=False,
checker_function=lambda x: isinstance(x, int)),
_Switch(["-u", "--multiSFS", "multiSFS"],
"Generate or use multidimensional SFS")]
AbstractCommandline.__init__(self, cmd, **kwargs)
class FastSimCoalController(object):
def __init__(self, fastsimcoal_dir=None, bin_name="fsc252"):
"""Initializes the controller.
fastsimcoal_dir is the directory where fastsimcoal is.
By default the binary should be called fsc252.
bin_name specifies a different name for the binary.
The initializer checks for existence and executability of binaries
and sets up the command line controller.
Fastsimcoal2 is available here: http://cmpg.unibe.ch/software/fastsimcoal2/.
This wrapper was written and tested for fastsimcoal version 2.51.
"""
self.bin_name = bin_name
self.fastsimcoal_dir = fastsimcoal_dir
if fastsimcoal_dir is None:
for path in os.environ["PATH"].split(os.pathsep):
if os.path.isfile(os.path.join(path, self.bin_name)):
self.fastsimcoal_dir = path
if self.fastsimcoal_dir is None:
raise IOError("Fastsimcoal not available")
else:
dir_contents = os.listdir(fastsimcoal_dir)
if self.bin_name not in dir_contents:
raise IOError("Fastsimcoal not available")
if not os.access(os.path.join(self.fastsimcoal_dir, self.bin_name), os.X_OK):
raise IOError("Fastsimcoal not executable")
def run_fastsimcoal(self, par_file, num_sims, par_dir='.', opts=None):
"""Executes Fastsimcoal.
par_file is the input parameter file (--ifile) for fastsimcoal.
num_sims is the number of simulations to perform.
par_dir is the directory where par_file is and where output will be written.
opts is a dictionary of additional options to fastsimcoal.
"""
if opts is None:
opts = {}
if par_dir is None:
par_dir = os.sep.join([".", "Fastsimcoal", "runs"])
if not os.path.exists(par_dir):
os.mkdir(par_dir)
curr_dir = os.getcwd()
os.chdir(par_dir)
if par_file is None: # Must use .tpl for -t instead if no par_file
controller = _FastSimCoalCommandLine(cmd=os.path.join(self.fastsimcoal_dir, self.bin_name),
numsims=num_sims, **opts)
else:
controller = _FastSimCoalCommandLine(cmd=os.path.join(self.fastsimcoal_dir, self.bin_name),
parfile=par_file, numsims=num_sims, **opts)
controller()
os.chdir(curr_dir)
|