1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
|
#!/usr/bin/env python
# File created on 13 Jul 2012
from __future__ import division
__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2011, The QIIME project"
__credits__ = ["Greg Caporaso", "Jose Antonio Navas Molina", "Emily TerAvest"]
__license__ = "GPL"
__version__ = "1.8.0"
__maintainer__ = "Greg Caporaso"
__email__ = "gregcaporaso@gmail.com"
from glob import glob
from os.path import isfile
from qiime.util import parse_command_line_parameters
from qiime.util import make_option
from qiime.beta_diversity import get_phylogenetic_metric
from qiime.beta_diversity import list_known_metrics
from qiime.util import load_qiime_config, get_options_lookup
from qiime.parallel.beta_diversity import (ParallelBetaDiversitySingle,
ParallelBetaDiversityMultiple)
qiime_config = load_qiime_config()
options_lookup = get_options_lookup()
script_info={}
script_info['brief_description']="""Parallel beta diversity"""
script_info['script_description']="""This script performs like the\
beta_diversity.py script, but is intended to make use of\
multicore/multiprocessor environments to perform analyses in parallel."""
script_info['script_usage']=[]
script_info['script_usage'].append(
("""Apply beta_diversity.py in parallel to multiple otu tables""",
"""Apply the unweighted_unifrac and weighted_unifrac metrics (modify with -m)\
to all otu tables in rarefied_otu_tables (-i) and write the resulting output\
files to bdiv/ (-o, will be created if it doesn't exist). Use the rep_set.tre\
(-t) to compute phylogenetic diversity metrics. ALWAYS SPECIFY ABSOLUTE FILE\
PATHS (absolute path represented here as $PWD, but will generally look\
something like /home/ubuntu/my_analysis/).""",
"""%prog -i $PWD/rarefied_otu_tables/ -o $PWD/bdiv/ -t $PWD/rep_set.tre"""))
script_info['script_usage'].append(
("""Apply beta_diversity.py in parallel to a single otu table""",""" """,
"""%prog -i $PWD/otu_table.biom -o $PWD/bdiv_single/ -t $PWD/rep_set.tre"""))
script_info['output_description']="""The output of %prog is a folder containing\
text files, each a distance matrix between samples."""
script_info['required_options'] = [\
make_option('-i', '--input_path',type='existing_path',
help='input path, must be directory [REQUIRED]'),\
make_option('-o', '--output_path',type='new_dirpath',
help='output path, must be directory [REQUIRED]'),
]
script_info['optional_options'] = [
make_option('-m', '--metrics', default='unweighted_unifrac,weighted_unifrac',
type='multiple_choice', mchoices=list_known_metrics(),
help='Beta-diversity metric(s) to use. A comma-separated list should be' +\
' provided when multiple metrics are specified. [default: %default]'),
make_option('-t', '--tree_path', type='existing_filepath',
help='path to newick tree file, required for phylogenetic metrics'+\
' [default: %default]'),\
options_lookup['retain_temp_files'],
options_lookup['suppress_submit_jobs'],
options_lookup['poll_directly'],
options_lookup['cluster_jobs_fp'],
options_lookup['suppress_polling'],
options_lookup['job_prefix'],
options_lookup['seconds_to_sleep'],
options_lookup['jobs_to_start'],
make_option('-f', '--full_tree', action="store_true",
help='By default, each job removes calls _fast_unifrac_setup to remove\
unused parts of the tree. pass -f if you already have a minimal tree, and\
this script will run faster'),
]
script_info['version'] = __version__
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
params = eval(str(opts))
params['metrics'] = ','.join(opts.metrics)
# create local copies of command-line options
input_path = opts.input_path
output_dir = opts.output_path
metrics_list = opts.metrics
tree_fp = opts.tree_path
# Check the tree exists if phylogenetically-aware measure is used
for metric in metrics_list:
try:
metric_f = get_phylogenetic_metric(metric)
if tree_fp == None:
option_parser.error("metric %s requires a tree, but "
"none found" % metric)
except AttributeError:
pass
if isfile(input_path):
# single otu table mode
parallel_runner = ParallelBetaDiversitySingle(
cluster_jobs_fp=opts.cluster_jobs_fp,
jobs_to_start=opts.jobs_to_start,
retain_temp_files=opts.retain_temp_files,
suppress_polling=opts.suppress_polling,
seconds_to_sleep=opts.seconds_to_sleep)
parallel_runner(input_path,
output_dir,
params,
job_prefix=opts.job_prefix,
poll_directly=opts.poll_directly,
suppress_submit_jobs=False)
else:
input_fps = glob('%s/*' % input_path)
parallel_runner = ParallelBetaDiversityMultiple(
cluster_jobs_fp=opts.cluster_jobs_fp,
jobs_to_start=opts.jobs_to_start,
retain_temp_files=opts.retain_temp_files,
suppress_polling=opts.suppress_polling,
seconds_to_sleep=opts.seconds_to_sleep)
parallel_runner(input_fps,
output_dir,
params,
job_prefix=opts.job_prefix,
poll_directly=opts.poll_directly,
suppress_submit_jobs=False)
if __name__ == "__main__":
main()
|