File: plot_semivariogram.py

package info (click to toggle)
qiime 1.4.0-2
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 29,704 kB
  • sloc: python: 77,837; haskell: 379; sh: 113; makefile: 103
file content (168 lines) | stat: -rwxr-xr-x 8,088 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#!/usr/bin/env python
# File created on 09 Feb 2010
from __future__ import division

__author__ = "Antonio Gonzalez Pena"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["Antonio Gonzalez Pena"]
__license__ = "GPL"
__version__ = "1.4.0"
__maintainer__ = "Antonio Gonzalez Pena"
__email__ = "antgonza@gmail.com"
__status__ = "Release"
 
from qiime.util import parse_command_line_parameters, get_options_lookup
from optparse import make_option
from qiime.plot_semivariogram import fit_semivariogram, FitModel
from qiime.parse import parse_distmat
from qiime.filter import filter_samples_from_distance_matrix
from pylab import plot, xlabel, ylabel, title, savefig, ylim, xlim
from numpy import asarray
import os
from StringIO import StringIO

options_lookup = get_options_lookup()

script_info={}

script_info['brief_description']="""Fits a model between two distance matrices and plots the result"""
script_info['script_description']="""Fits a spatial autocorrelation model between two matrices and plots the result. This script will work with two distance matrices but will ignore the 0s at the diagonal and the values that go to N/A"""
script_info['script_usage']=[]
script_info['script_usage'].append(("""Fitting""","""For this script, the user supplies two distance matrices (i.e. resulting file from beta_diversity.py), along with the output filename (e.g. semivariogram), and the model to fit, as follows:""","""plot_semivariogram.py -x distance.txt -y unifrac.txt -m exponential -o semivariogram.png"""))
script_info['output_description']="""The resulting output file consists of a pdf image containing the plot between the two distances matrices and the fitted model"""

script_info['required_options']=[\
 make_option('-x', '--input_path_x',\
     help='path to distance matrix to be displayed in the x axis'),\
 make_option('-y', '--input_path_y',\
     help='path to distance matrix to be displayed in the y axis'),\
 make_option('-m', '--model', type='choice',\
     choices=FitModel.options, default='exponential', 
     help='model to be fitted to the data. Valid ' +\
     'choices are:' + ', '.join(FitModel.options) + '. [default: %default]'),\
 make_option('-o', '--output_path',
     help='output path. directory for batch processing, '+\
       'filename for single file operation'),
 make_option('-X', '--x_label', default='Distance Dissimilarity (m)',\
     help='Label for the x axis [default: %default]'),
 make_option('-Y', '--y_label', default='Community Dissimilarity',\
     help='Label for the y axis [default: %default]'),
 make_option('-t', '--fig_title', default='Semivariogram',\
     help='Title of the plot [default: %default]'),       
 make_option('--dot_color', help='dot color for plot, more info:' +\
    ' http://matplotlib.sourceforge.net/api/pyplot_api.html' +\
    ' [default: %default]', default="wo"), 
 make_option('--line_color', help='line color for plot, more info:' +\
    ' http://matplotlib.sourceforge.net/api/pyplot_api.html' +\
    ' [default: %default]', default="blue"), 
 make_option('--dot_alpha', type='float', help='alpha for dots, more info:' +\
    ' http://matplotlib.sourceforge.net/api/pyplot_api.html' +\
    ' [default: %default]', default=1),
 make_option('--line_alpha', type='float', help='alpha for dots, more info:' +\
    ' http://matplotlib.sourceforge.net/api/pyplot_api.html' +\
    ' [default: %default]', default=1),
]
script_info['optional_options']=[\
 make_option('-b', '--binning', type='string',\
     default=None, help='binning ranges. Format: [increment,top_limit], when ' +\
     'top_limit is -1=infinitum; you can specify several ranges using the same ' +\
     'format, i.e. [2.5,10][50,-1] will set two bins, one from 0-10 using 2.5 ' +\
     'size steps and from 10-inf using 50 size steps. Note that the binning is ' +\
     'used to clean the plots (reduce number of points) but ignored to fit the ' +\
     'model. [default: %default]'),
 make_option('--ignore_missing_samples', help='This will overpass the error raised ' +\
     'when the matrices have different sizes/samples', action='store_true', default=False),         
 make_option('--x_max', type='float', help='x axis max limit [default: auto]', default=None),         
 make_option('--x_min', type='float', help='x axis min limit [default: auto]', default=None),         
 make_option('--y_max', type='float', help='y axis max limit [default: auto]', default=None),         
 make_option('--y_min', type='float', help='y axis min limit [default: auto]', default=None),
]

script_info['version'] = __version__

def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    
    if opts.binning is None:
        ranges = []
    else:
        # simple ranges format validation
        if opts.binning.count('[')!=opts.binning.count(']') or\
          opts.binning.count('[')!=opts.binning.count(','):
            raise ValueError, "The binning input has an error: '%s'; " % +\
             "\nthe format should be [increment1,top_limit1][increment2,top_limit2]" 
        # spliting in ranges
        rgn_txt = opts.binning.split('][')
        # removing left [ and right ]
        rgn_txt[0] = rgn_txt[0][1:]
        rgn_txt[-1] = rgn_txt[-1][:-1]
        # converting into int
        ranges = []
        max = 0
        
        for i,r in enumerate(rgn_txt):
            try:
                values = map(float,r.split(','))
            except ValueError:
                raise ValueError, "Not a valid format for binning %s" % opts.binning 
            if len(values)!=2:
                raise ValueError, "All ranges must have only 2 values: [%s]" % r
            elif i+1!=len(rgn_txt): 
                if values[0]>values[1]:
                    raise ValueError, "The bin value can't be greater than the max value: [%s]" % r
                elif values<0:
                    raise ValueError, "This value can not be negative: [%s]" % r
                elif max>values[1]:
                    raise ValueError, "This value can not smaller than the previous one: [%s]" % r
                else:
                    max=values[1]
            
            ranges.append(values)
    
    x_samples, x_distmtx = parse_distmat(open(opts.input_path_x,'U'))
    y_samples, y_distmtx = parse_distmat(open(opts.input_path_y,'U'))
    
    if opts.ignore_missing_samples:
        ignoring_from_x = list(set(x_samples)-set(y_samples))
        ignoring_from_y = list(set(y_samples)-set(x_samples))
        
        if opts.verbose:
            print '\nFrom %s we are ignoring: %s\n' % (opts.input_path_x, ignoring_from_x)
            print '\nFrom %s we are ignoring: %s\n' % (opts.input_path_y, ignoring_from_y)
            print '\nOnly using: %s\n' % (list(set(x_samples) & set(y_samples)))
        
        x_file = StringIO(\
            filter_samples_from_distance_matrix((x_samples, x_distmtx), ignoring_from_x))
        x_samples, x_distmtx = parse_distmat(x_file)
        
        y_file = StringIO(\
            filter_samples_from_distance_matrix((y_samples, y_distmtx), ignoring_from_y))
        y_samples, y_distmtx = parse_distmat(y_file)
    else:
        if x_distmtx.shape!=y_distmtx.shape:
            raise ValueError, 'The distance matrices have different sizes. ' +\
                'You can cancel this error by passing --ignore_missing_samples'
        
    (x_val,y_val,x_fit,y_fit) =\
          fit_semivariogram((x_samples,x_distmtx), (y_samples,y_distmtx), opts.model, ranges)
    
    plot(x_val, y_val, opts.dot_color, alpha=opts.dot_alpha)
    plot(x_fit, y_fit, linewidth=2.0, color=opts.line_color, alpha=opts.line_alpha)
    
    if opts.x_min!=None and opts.x_max!=None:
        xlim([opts.x_min,opts.x_max])
    if opts.y_min!=None and opts.y_max!=None:
        ylim([opts.y_min,opts.y_max])
        
    x_label = opts.x_label
    y_label = opts.y_label
    fig_title = '%s (%s)' % (opts.fig_title, opts.model)
    
    xlabel(x_label)
    ylabel(y_label)
    title(fig_title)
    
    savefig(opts.output_path)

if __name__ == "__main__":
    main()