1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
|
#!/usr/bin/env python
# File created on 09 Feb 2010
from __future__ import division
__author__ = "Jeremy Widmann"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["Jeremy Widmann","Rob Knight","Jesse Stombaugh"]
__license__ = "GPL"
__version__ = "1.4.0"
__maintainer__ = "Jeremy Widmann"
__email__ = "Jeremy.Widmann@colorado.edu"
__status__ = "Release"
from qiime.util import parse_command_line_parameters, get_qiime_project_dir,\
get_options_lookup
from qiime.util import make_option,get_interesting_mapping_fields
from qiime.make_distance_histograms import group_distances, \
draw_all_histograms, _make_relative_paths, make_main_html, \
monte_carlo_group_distances, monte_carlo_group_distances_within_between
from cogent.util.misc import get_random_directory_name
from qiime.colors import sample_color_prefs_and_map_data_from_options,\
iter_color_groups
from qiime.parse import (parse_mapping_file, parse_distmat,
parse_prefs_file, QiimeParseError)
from os import mkdir,path
from string import strip
options_lookup = get_options_lookup()
script_info={}
script_info['brief_description']="""Make distance histograms"""
script_info['script_description']="""To visualize the distance between samples and/or categories in the metadata mapping file, the user can generate histograms to represent the distances between samples. This script generates an HTML file, where the user can compare the distances between samples based on the different categories associated to each sample in the metadata mapping file. """
script_info['script_usage']=[]
script_info['script_usage'].append(("""Examples:""","""Distance Histograms are a way to compare different categories and see which tend to have larger/smaller distances than others. For example, in the hand study, you may want to compare the distances between hands to the distances between individuals (with the file "hand_distances.txt" using the parameter -d hand_distances.txt). The categories are defined in the metadata mapping file (specified using the parameter -m hand_map.txt). If you want to look at the distances between hands and individuals, choose the "Hand" field and "Individual" field (using the parameter --fields Hand,Individual (notice the fields are comma delimited)). For each of these groups of distances a histogram is made. The output is a HTML file which is created in the "Distance_Histograms" directory (using the parameter -o Distance_Histograms to specify output directory) where you can look at all the distance histograms individually, and compare them between each other.
In the following command, the user only supplies a distance matrix (i.e. resulting file from beta_diversity.py), the user-generated metadata mapping file and one category (e.g. pH):""","""make_distance_histograms.py -d beta_div.txt -m Mapping_file.txt --fields pH"""))
script_info['script_usage'].append(("""""","""For comparison of multiple categories (e.g. pH, salinity), you can use the following command:""","""make_distance_histograms.py -d beta_div.txt -m Mapping_file.txt --fields pH,salinity"""))
script_info['script_usage'].append(("""""","""HTML output is automatically generated. If the user would like to suppress the HTML output, you can use the following command:""","""make_distance_histograms.py -d beta_div.txt -m Mapping_file.txt --fields pH --suppress_html_output"""))
script_info['script_usage'].append(("""""","""In the case that the user generates their own preferences file (prefs.txt), they can use the following command:""","""make_distance_histograms.py -d beta_div.txt -m Mapping_file.txt -p prefs.txt"""))
script_info['script_usage'].append(("""""","""Note: In the case that a preferences file is passed, the user does not need to supply fields in the command-line.""",""""""))
script_info['output_description']="""The result of this script will be a folder containing images and/or an html file (with appropriate javascript files), depending on the user-defined parameters."""
script_info['required_options']=[\
make_option('-d','--distance_matrix_file',
help='Input distance matrix filepath (i.e. the result of' +\
' beta_diversity.py).',
type='existing_filepath'),\
make_option('-m', '--map_fname', dest='map_fname', \
help='Input metadata mapping filepath.',
type='existing_filepath'), \
]
script_info['optional_options']=[\
make_option('-p', '--prefs_path',
help='Input user-generated preferences filepath. NOTE: This is a' +\
' file with a dictionary containing preferences for the analysis.' +\
' This dictionary must have a "Fields" key mapping to a list of' +\
' desired fields. [default: %default]',
type='existing_filepath'),
make_option('-o', '--dir_path',
default='./',help='Output directory. [default: %default]',
type='new_dirpath'),\
make_option('-k', '--background_color', dest='background_color',\
default='white', type='choice',choices=['black','white'],
help='Background color for use in the plots' +\
' (black or white) [default: %default]'),
make_option('--monte_carlo',dest='monte_carlo',default=None,\
action='store_true',
help='Deprecated: pass --monte_carlo_iters > 0 to enable'),\
make_option('--suppress_html_output',dest='suppress_html_output',\
default=False,action='store_true',
help='Suppress HTML output. [default: %default]'),\
make_option('-f','--fields', default=None,
help='Comma-separated list of fields to compare, where the list of' +\
' fields should be in quotes (e.g. "Field1,Field2,Field3").' +\
' Note: if this option is passed on the' +\
' command-line, it will overwrite the fields in prefs file.'+\
' [default:%default; first field in mapping file is used]'),\
make_option('--monte_carlo_iters', dest='monte_carlo_iters',type="int",\
default=0,
help='Number of iterations to perform for Monte Carlo analysis.' +\
' [default: %default; No monte carlo simulation performed]'),\
]
script_info['option_label']={'distance_matrix_file':'Distance matrix filepath',
'map_fname':'QIIME-formatted mapping filepath',
'prefs_path': 'Preferences filepath',
'dir_path': 'Output directory',
'background_color': 'Background color',
'monte_carlo': 'Perform Monte Carlo',
'monte_carlo_iters':'# of Monte Carlo iterations',
'suppress_html_output': 'Suppress HTML',
'fields':'Categories to compare'}
script_info['version'] = __version__
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
#Some code for error checking of input args:
#Check if distance_matrix_file is valid:
try:
d_header, d_mat = parse_distmat(open(opts.distance_matrix_file,'U'))
except:
option_parser.error("This does not look like a valid distance matrix file. Please supply a valid distance matrix file using the -d option.")
#Check if map_fname is valid:
try:
mapping, m_header, m_comments = \
parse_mapping_file(open(opts.map_fname,'U'))
except QiimeParseError:
option_parser.error("This does not look like a valid metadata mapping file. Please supply a valid mapping file using the -m option.")
#make sure background_color is valid
if opts.background_color not in ['black','white']:
option_parser.error("'%s' is not a valid background color. Please pass in either 'black' or 'white' using the -k option."%(opts.background_color))
#make sure prefs file is valid if it exists
if opts.prefs_path is not None:
try:
prefs_file = open(opts.prefs_path, 'U').read()
except IOError:
option_parser.error("Provided prefs file, '%s', does not exist. Please pass in a valid prefs file with the -p option."%(opts.prefs_path))
if opts.prefs_path is not None:
prefs = parse_prefs_file(prefs_file)
else:
prefs=None
color_prefs, color_data, background_color, label_color, ball_scale,\
arrow_colors=sample_color_prefs_and_map_data_from_options(opts)
#list of labelname, groups, colors, data_colors, data_color_order
groups_and_colors=list(iter_color_groups(mapping=color_data['map'],\
prefs=color_prefs))
#dict mapping labelname to list of: [groups, colors, data_colors,
# data_color_order]
field_to_colors = {}
for color_info in groups_and_colors:
field_to_colors[color_info[0]]=color_info[1:]
qiime_dir = get_qiime_project_dir()+'/qiime/support_files/'
fields = opts.fields
if fields is not None:
fields = map(strip,fields.split(','))
fields = [i.strip('"').strip("'") for i in fields]
elif prefs is not None:
fields = prefs.get('FIELDS',None)
else:
fields = get_interesting_mapping_fields(mapping, m_header)
#Check that all provided fields are valid:
if fields is not None:
for f in fields:
if f not in m_header:
option_parser.error("The field, %s, is not in the provided mapping file. Please supply correct fields (using the -f option or providing a 'FIELDS' list in the prefs file) corresponding to fields in mapping file."%(f))
within_distances, between_distances, dmat = \
group_distances(mapping_file=opts.map_fname,\
dmatrix_file=opts.distance_matrix_file,\
fields=fields,\
dir_prefix=get_random_directory_name(output_dir=opts.dir_path,\
prefix='distances'))
if not opts.suppress_html_output:
#histograms output path
histograms_path = path.join(opts.dir_path,'histograms')
try:
mkdir(histograms_path)
except OSError: #raised if dir exists
pass
#draw all histograms
distances_dict, label_to_histogram_filename = \
draw_all_histograms(single_field=within_distances, \
paired_field=between_distances, \
dmat=dmat,\
histogram_dir=histograms_path,\
field_to_color_prefs=field_to_colors,\
background_color=background_color)
#Get relative path to histogram files.
label_to_histogram_filename_relative = \
_make_relative_paths(label_to_histogram_filename, opts.dir_path)
dm_fname=path.split(opts.distance_matrix_file)[-1]
basename=path.splitext(dm_fname)[0]
outfile_name = basename+'_distance_histograms.html'
make_main_html(distances_dict=distances_dict,\
label_to_histogram_filename=label_to_histogram_filename_relative,\
root_outdir=opts.dir_path, \
outfile_name = outfile_name, \
title='Distance Histograms')
#Handle saving web resources locally.
#javascript file
javascript_path = path.join(opts.dir_path,'js')
try:
mkdir(javascript_path)
except OSError: #raised if dir exists
pass
js_out = open(javascript_path+'/histograms.js','w')
js_out.write(open(qiime_dir+'js/histograms.js').read())
js_out.close()
monte_carlo_iters = opts.monte_carlo_iters
if monte_carlo_iters > 0:
#Do Monte Carlo for all fields
monte_carlo_group_distances(mapping_file=opts.map_fname,\
dmatrix_file=opts.distance_matrix_file,\
prefs=prefs, \
dir_prefix = opts.dir_path,\
fields=fields,\
default_iters=monte_carlo_iters)
#Do Monte Carlo for within and between fields
monte_carlo_group_distances_within_between(\
single_field=within_distances,\
paired_field=between_distances, dmat=dmat, \
dir_prefix = opts.dir_path,\
num_iters=monte_carlo_iters)
if __name__ == "__main__":
main()
|