1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
|
#!/usr/bin/env python
# File created on 09 Feb 2010
from __future__ import division
__author__ = "Antonio Gonzalez Pena"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["Antonio Gonzalez Pena"]
__license__ = "GPL"
__version__ = "1.4.0"
__maintainer__ = "Antonio Gonzalez Pena"
__email__ = "antgonza@gmail.com"
__status__ = "Release"
from qiime.util import parse_command_line_parameters, get_options_lookup
from optparse import make_option
from qiime.plot_semivariogram import fit_semivariogram, FitModel
from qiime.parse import parse_distmat
from qiime.filter import filter_samples_from_distance_matrix
from pylab import plot, xlabel, ylabel, title, savefig, ylim, xlim
from numpy import asarray
import os
from StringIO import StringIO
options_lookup = get_options_lookup()
script_info={}
script_info['brief_description']="""Fits a model between two distance matrices and plots the result"""
script_info['script_description']="""Fits a spatial autocorrelation model between two matrices and plots the result. This script will work with two distance matrices but will ignore the 0s at the diagonal and the values that go to N/A"""
script_info['script_usage']=[]
script_info['script_usage'].append(("""Fitting""","""For this script, the user supplies two distance matrices (i.e. resulting file from beta_diversity.py), along with the output filename (e.g. semivariogram), and the model to fit, as follows:""","""plot_semivariogram.py -x distance.txt -y unifrac.txt -m exponential -o semivariogram.png"""))
script_info['output_description']="""The resulting output file consists of a pdf image containing the plot between the two distances matrices and the fitted model"""
script_info['required_options']=[\
make_option('-x', '--input_path_x',\
help='path to distance matrix to be displayed in the x axis'),\
make_option('-y', '--input_path_y',\
help='path to distance matrix to be displayed in the y axis'),\
make_option('-m', '--model', type='choice',\
choices=FitModel.options, default='exponential',
help='model to be fitted to the data. Valid ' +\
'choices are:' + ', '.join(FitModel.options) + '. [default: %default]'),\
make_option('-o', '--output_path',
help='output path. directory for batch processing, '+\
'filename for single file operation'),
make_option('-X', '--x_label', default='Distance Dissimilarity (m)',\
help='Label for the x axis [default: %default]'),
make_option('-Y', '--y_label', default='Community Dissimilarity',\
help='Label for the y axis [default: %default]'),
make_option('-t', '--fig_title', default='Semivariogram',\
help='Title of the plot [default: %default]'),
make_option('--dot_color', help='dot color for plot, more info:' +\
' http://matplotlib.sourceforge.net/api/pyplot_api.html' +\
' [default: %default]', default="wo"),
make_option('--line_color', help='line color for plot, more info:' +\
' http://matplotlib.sourceforge.net/api/pyplot_api.html' +\
' [default: %default]', default="blue"),
make_option('--dot_alpha', type='float', help='alpha for dots, more info:' +\
' http://matplotlib.sourceforge.net/api/pyplot_api.html' +\
' [default: %default]', default=1),
make_option('--line_alpha', type='float', help='alpha for dots, more info:' +\
' http://matplotlib.sourceforge.net/api/pyplot_api.html' +\
' [default: %default]', default=1),
]
script_info['optional_options']=[\
make_option('-b', '--binning', type='string',\
default=None, help='binning ranges. Format: [increment,top_limit], when ' +\
'top_limit is -1=infinitum; you can specify several ranges using the same ' +\
'format, i.e. [2.5,10][50,-1] will set two bins, one from 0-10 using 2.5 ' +\
'size steps and from 10-inf using 50 size steps. Note that the binning is ' +\
'used to clean the plots (reduce number of points) but ignored to fit the ' +\
'model. [default: %default]'),
make_option('--ignore_missing_samples', help='This will overpass the error raised ' +\
'when the matrices have different sizes/samples', action='store_true', default=False),
make_option('--x_max', type='float', help='x axis max limit [default: auto]', default=None),
make_option('--x_min', type='float', help='x axis min limit [default: auto]', default=None),
make_option('--y_max', type='float', help='y axis max limit [default: auto]', default=None),
make_option('--y_min', type='float', help='y axis min limit [default: auto]', default=None),
]
script_info['version'] = __version__
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
if opts.binning is None:
ranges = []
else:
# simple ranges format validation
if opts.binning.count('[')!=opts.binning.count(']') or\
opts.binning.count('[')!=opts.binning.count(','):
raise ValueError, "The binning input has an error: '%s'; " % +\
"\nthe format should be [increment1,top_limit1][increment2,top_limit2]"
# spliting in ranges
rgn_txt = opts.binning.split('][')
# removing left [ and right ]
rgn_txt[0] = rgn_txt[0][1:]
rgn_txt[-1] = rgn_txt[-1][:-1]
# converting into int
ranges = []
max = 0
for i,r in enumerate(rgn_txt):
try:
values = map(float,r.split(','))
except ValueError:
raise ValueError, "Not a valid format for binning %s" % opts.binning
if len(values)!=2:
raise ValueError, "All ranges must have only 2 values: [%s]" % r
elif i+1!=len(rgn_txt):
if values[0]>values[1]:
raise ValueError, "The bin value can't be greater than the max value: [%s]" % r
elif values<0:
raise ValueError, "This value can not be negative: [%s]" % r
elif max>values[1]:
raise ValueError, "This value can not smaller than the previous one: [%s]" % r
else:
max=values[1]
ranges.append(values)
x_samples, x_distmtx = parse_distmat(open(opts.input_path_x,'U'))
y_samples, y_distmtx = parse_distmat(open(opts.input_path_y,'U'))
if opts.ignore_missing_samples:
ignoring_from_x = list(set(x_samples)-set(y_samples))
ignoring_from_y = list(set(y_samples)-set(x_samples))
if opts.verbose:
print '\nFrom %s we are ignoring: %s\n' % (opts.input_path_x, ignoring_from_x)
print '\nFrom %s we are ignoring: %s\n' % (opts.input_path_y, ignoring_from_y)
print '\nOnly using: %s\n' % (list(set(x_samples) & set(y_samples)))
x_file = StringIO(\
filter_samples_from_distance_matrix((x_samples, x_distmtx), ignoring_from_x))
x_samples, x_distmtx = parse_distmat(x_file)
y_file = StringIO(\
filter_samples_from_distance_matrix((y_samples, y_distmtx), ignoring_from_y))
y_samples, y_distmtx = parse_distmat(y_file)
else:
if x_distmtx.shape!=y_distmtx.shape:
raise ValueError, 'The distance matrices have different sizes. ' +\
'You can cancel this error by passing --ignore_missing_samples'
(x_val,y_val,x_fit,y_fit) =\
fit_semivariogram((x_samples,x_distmtx), (y_samples,y_distmtx), opts.model, ranges)
plot(x_val, y_val, opts.dot_color, alpha=opts.dot_alpha)
plot(x_fit, y_fit, linewidth=2.0, color=opts.line_color, alpha=opts.line_alpha)
if opts.x_min!=None and opts.x_max!=None:
xlim([opts.x_min,opts.x_max])
if opts.y_min!=None and opts.y_max!=None:
ylim([opts.y_min,opts.y_max])
x_label = opts.x_label
y_label = opts.y_label
fig_title = '%s (%s)' % (opts.fig_title, opts.model)
xlabel(x_label)
ylabel(y_label)
title(fig_title)
savefig(opts.output_path)
if __name__ == "__main__":
main()
|