File: test_transform_coordinate_matrices.py

package info (click to toggle)
qiime 1.4.0-2
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 29,704 kB
  • sloc: python: 77,837; haskell: 379; sh: 113; makefile: 103
file content (249 lines) | stat: -rwxr-xr-x 12,597 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
#!/usr/bin/env python
# File created on 21 Feb 2010
from __future__ import division

__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["Greg Caporaso"]
__license__ = "GPL"
__version__ = "1.4.0"
__maintainer__ = "Greg Caporaso"
__email__ = "gregcaporaso@gmail.com"
__status__ = "Release"
 
from numpy import array
from cogent.util.unit_test import TestCase, main
from qiime.parse import parse_coords
from qiime.transform_coordinate_matrices import map_sample_ids, reorder_coords,\
    filter_coords_matrix, pad_coords_matrix, pad_coords_matrices,\
    get_procrustes_results, procrustes_monte_carlo


class ProcrustesTests(TestCase):
    """ Tests of the Procrustes wrapper code """
    
    def setUp(self):
        """ """
        self.pcoa1_f = pcoa1_f.split('\n')
        self.sample_ids1, self.coords1, self.eigvals1, self.pct_var1 =\
          parse_coords(self.pcoa1_f)
        self.pcoa2_f = pcoa2_f.split('\n')
        self.sample_ids2, self.coords2, self.eigvals2, self.pct_var2 =\
          parse_coords(self.pcoa2_f)
        self.pcoa3_f = pcoa3_f.split('\n')
        self.sample_ids3, self.coords3, self.eigvals3, self.pct_var3 =\
          parse_coords(self.pcoa3_f)
        self.pcoa4_f = pcoa4_f.split('\n')
        self.sample_ids4, self.coords4, self.eigvals4, self.pct_var4 =\
          parse_coords(self.pcoa3_f)
          
        self.sample_id_map1 = sample_id_map1
        
    def test_map_sample_ids(self):
        """Mapping and reordering of sample IDs functions as expected
        """
        expected = ['s4','s1','s2','s3']
        actual = map_sample_ids(self.sample_ids1,self.sample_id_map1)
        self.assertEqual(actual,expected)
        expected = ['s1','s2','s3','s4']
        actual = map_sample_ids(self.sample_ids2,self.sample_id_map1)
        self.assertEqual(actual,expected)
        expected = ['s1','s2','s3','s4']
        actual = map_sample_ids(self.sample_ids3,self.sample_id_map1)
        self.assertEqual(actual,expected)
        # bad sample id raises KeyError
        self.assertRaises(KeyError,map_sample_ids,['abcd','aaa','ccc'],\
         self.sample_id_map1)
    
    def test_reorder_coords(self):
        """Reordering of columns functions as expected """
        m = [[1,2,3],[4,5,6],[7,8,9]]
        in_sids = ['A','B','C']
        order = ['A','B','C']
        expected = m
        self.assertEqual(reorder_coords(m,in_sids,order),expected)
        
        in_sids = ['C','B','A']
        expected = [[7,8,9],[4,5,6],[1,2,3]]
        self.assertEqual(reorder_coords(m,in_sids,order),expected)
        
        in_sids = ['C','B','A']
        expected = [[7,8,9],[4,5,6],[1,2,3]]
        self.assertEqual(reorder_coords(m,in_sids,order),expected)
        
        # order leaves some samples out 
        m = [[1,2,3],[4,5,6],[7,8,9]]
        in_sids = ['A','B','C']
        order = ['A','B']
        expected = [[1,2,3],[4,5,6]]
        self.assertEqual(reorder_coords(m,in_sids,order),expected)
        
    def test_reorder_coords_errors(self):
        """Reordering of columns handles errors """
        
        m = [[1,2,3],[4,5,6],[7,8,9]]
        in_sids = ['A','B','C']
        order = ['A','B','C','D']
        self.assertRaises(ValueError,reorder_coords,m,in_sids,order)
        
    def test_filter_coords_matrix(self):
        """ Removing coordinate from each sample functions as expected
        """
        m = array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
        expected = array([[1],[4],[7],[10]])
        self.assertEqual(filter_coords_matrix(m,1),expected)
        expected = array([[1,2],[4,5],[7,8],[10,11]])
        self.assertEqual(filter_coords_matrix(m,2),expected)
        
    def test_pad_coords_matrix(self):
        """ padding a coordinates matrix with zeros functions as expected
        """
        m = array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
        actual = pad_coords_matrix(m,0)
        self.assertEqual(actual,m)
        
        m = array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
        expected = array([[1,2,3,0.,0.],[4,5,6,0.,0.],\
                          [7,8,9,0.,0.],[10,11,12,0.,0.]])
        actual = pad_coords_matrix(m,2)
        self.assertEqual(actual,expected)
        
        m = array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
        self.assertRaises(ValueError,pad_coords_matrix,m,-3)
        
    def test_pad_coords_matrices(self):
        """ padding the correct choice of a coords matrix with zeros functions
        """
        # first is longer
        m1 = array([[1,0,3,9,4],[4,5,6,94,5],[7,8,9,6,6],[10,11,12,4,6]])
        m2 = array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
        m2_expected = array([[1,2,3,0.,0.],[4,5,6,0.,0.],\
                             [7,8,9,0.,0.],[10,11,12,0.,0.]])
        actual = pad_coords_matrices(m1,m2)
        expected = (m1,m2_expected)
        self.assertEqual(actual,expected)
        
        # second is longer
        m1 = array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
        m2 = array([[1,2,3,9],[4,5,6,94],[7,8,9,6],[10,11,12,4]])
        m1_expected = array([[1,2,3,0.],[4,5,6,0.],[7,8,9,0.],[10,11,12,0.]])
        actual = pad_coords_matrices(m1,m2)
        expected = (m1_expected,m2)
        self.assertEqual(actual,expected)
        
        # equal length, so no change
        m1 = array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
        m2 = array([[0,2,3],[4,5,6],[7,8,9],[10,11,12]])
        actual = pad_coords_matrices(m1,m2)
        expected = (m1,m2)
        self.assertEqual(actual,expected)
        
    def test_get_procrustes_results(self):
        sample_id_map = {'CP3A1':'S1','CC1A1':'S2','CC2A1':'S3','CP1A1':'S4'}
        actual = get_procrustes_results(self.pcoa1_f,self.pcoa1_f,\
         sample_id_map=sample_id_map,randomize=None,max_dimensions=None)
        # just some sanity checks as the individual componenets are 
        # already tested -- these are based on looking at the output of the
        # run, and testing to ensure that it hasn't changed
        self.assertEqual(set(actual[0].split('\n')),set('pc vector number\t1\t2\t3\t4\t5\t6\nS1\t0.116737988534\t0.414627960015\t0.201315243115\t0.113769076804\t-0.283025353088\t-0.144278863311\nS2\t-0.238263544222\t-0.37724227779\t-0.169458651217\t0.0305157004776\t0.112181007345\t0.0677415967093\nS3\t-0.199225958574\t-0.250846540029\t-0.119813087305\t-0.155652031006\t0.18495315824\t-0.160875399364\nS4\t0.320751514262\t0.213460857804\t0.0879564954067\t0.0113672537238\t-0.0141088124974\t0.237412665966\n\n\neigvals\t8976580.24393\t6044862.67619\t4372581.39431\t3161360.10319\t2583594.45275\t2407555.39787\n% variation explained\t23.1764657118\t15.6071186064\t11.2894866423\t8.16225689998\t6.67053450426\t6.21602253997'.split('\n')))
        self.assertEqual(set(actual[1].split('\n')),set('pc vector number\t1\t2\t3\t4\t5\t6\nS1\t0.116737988534\t0.414627960015\t0.201315243115\t0.113769076804\t-0.283025353088\t-0.144278863311\nS2\t-0.238263544222\t-0.37724227779\t-0.169458651217\t0.0305157004776\t0.112181007345\t0.0677415967093\nS3\t-0.199225958574\t-0.250846540029\t-0.119813087305\t-0.155652031006\t0.18495315824\t-0.160875399364\nS4\t0.320751514262\t0.213460857804\t0.0879564954067\t0.0113672537238\t-0.0141088124974\t0.237412665966\n\n\neigvals\t8976580.24393\t6044862.67619\t4372581.39431\t3161360.10319\t2583594.45275\t2407555.39787\n% variation explained\t23.1764657118\t15.6071186064\t11.2894866423\t8.16225689998\t6.67053450426\t6.21602253997'.split('\n')))
        self.assertTrue(actual[2] < 6e-30)
        
    def test_get_procrustes_results_imprefect_sample_overlap(self):
        sample_id_map = {'aaa':'S0','bbb':'S1','ccc':'S2','ddd':'S3','eee':'S4'}
        actual = get_procrustes_results(self.pcoa3_f,self.pcoa4_f,\
         sample_id_map=sample_id_map,randomize=None,max_dimensions=None)
        # Confirm that only the sample ids that are in both procrustes results 
        # show up in the output
        for a in actual[:2]:
            self.assertTrue('S1' in a)
            self.assertTrue('S2' in a)
            self.assertTrue('S3' in a)
            self.assertTrue('S0' not in a)
            self.assertTrue('S4' not in a)
        
        
    def test_procrustes_monte_carlo(self):
        """ sanity test of procrustes_monte_carlo wrapper function
          
          THIS TEST MAY RANDOMLY FAIL BECAUSE IT IS BASED ON TESTING 
          RANDOM PERMUTATIONS, BUT THAT SHOULD BE RARE.
        
        """
        def shuffle_f(coords):
            """ A fake shuffle function -- used to avoid random test failures 
                
                returns a re-ordered coords2
            """
            return array(\
             [[-0.16713783,  0.22321481,  0.33766418,  0.22785083, -0.23830446, -0.18754852],
              [-0.14864343,  0.07290181, -0.06250315,  0.03201972, -0.0966749 , -0.10337987],
              [ 0.35725269, -0.00761567,  0.09044279, -0.21006839, -0.01355589, -0.04590791],
              [ 0.26535811,  0.09772598,  0.04339214, -0.21014987,  0.14089095, -0.10261849]])
              
        actual = procrustes_monte_carlo(self.pcoa1_f,
                                        self.pcoa2_f,
                                        trials=100,
                                        shuffle_f=shuffle_f)
        # just some sanity checks as the individual componenets are 
        # already tested -- these are based on looking at the output of the
        # run, and testing to ensure that it hasn't changed 
        expected_actual_m2 = 0.0211
        expected_len_trial_m2 = 100
        expected_count_better = 0
        expected_p_value = 0.0
        self.assertAlmostEqual(actual[0],expected_actual_m2,3)
        self.assertEqual(len(actual[1]),expected_len_trial_m2)
        self.assertEqual(actual[2], expected_count_better)
        self.assertEqual(actual[3], expected_p_value)


pcoa1_f = """pc vector number	1	2	3	4	5	6
CP3A1	-322.585729836	938.204618621	-28.2137779927	490.569459399	-1046.48732174	-234.500487421
CC1A1	-1190.7802044	-998.399315723	-934.981261143	286.964362952	-79.9676118756	284.018377708
CC2A1	-1095.30958517	-689.284928862	-813.567688811	-168.328870745	98.0045191705	-275.089103439
CP1A1	176.351315302	446.228808693	-305.44480187	240.13477453	-388.822965062	698.967247585


eigvals	8976580.24393	6044862.67619	4372581.39431	3161360.10319	2583594.45275	2407555.39787
% variation explained	23.1764657118	15.6071186064	11.2894866423	8.16225689998	6.67053450426	6.21602253997
"""

pcoa2_f = """pc vector number	1	2	3	4	5	6
CC1A1	0.265358109258	0.0977259770776	0.0433921359913	-0.210149870291	0.140890951431	-0.102618488686
CC2A1	0.357252689812	-0.00761566770389	0.0904427944586	-0.210068386229	-0.0135558941951	-0.0459079084294
CP1A1	-0.148643434206	0.072901814482	-0.0625031523656	0.0320197230867	-0.0966748975994	-0.103379871571	
CP3A1	-0.167137833152	0.223214806728	0.337664175642	0.227850833252	-0.238304458085	-0.187548520834	


eigvals	1.03654365499	0.486727634877	0.436010533243	0.344489629748	0.325443839964	0.301867574234
% variation explained	17.9408597738	8.42445197874	7.54662266189	5.96254688461	5.63289569997	5.2248294546
"""

pcoa3_f = """pc vector number	1	2	3	4	5	6
aaa	0.265358109258	0.0977259770776	0.0433921359913	-0.210149870291	0.140890951431	-0.102618488686
bbb	0.357252689812	-0.00761566770389	0.0904427944586	-0.210068386229	-0.0135558941951	-0.0459079084294
ccc	-0.148643434206	0.072901814482	-0.0625031523656	0.0320197230867	-0.0966748975994	-0.103379871571	
ddd	-0.167137833152	0.223214806728	0.337664175642	0.227850833252	-0.238304458085	-0.187548520834	


eigvals	1.03654365499	0.486727634877	0.436010533243	0.344489629748	0.325443839964	0.301867574234
% variation explained	17.9408597738	8.42445197874	7.54662266189	5.96254688461	5.63289569997	5.2248294546
"""

pcoa4_f = """pc vector number	1	2	3	4	5	6
eee	0.265358109258	0.0977259770776	0.0433921359913	-0.210149870291	0.140890951431	-0.102618488686
bbb	0.357252689812	-0.00761566770389	0.0904427944586	-0.210068386229	-0.0135558941951	-0.0459079084294
ccc	-0.148643434206	0.072901814482	-0.0625031523656	0.0320197230867	-0.0966748975994	-0.103379871571	
ddd	-0.167137833152	0.223214806728	0.337664175642	0.227850833252	-0.238304458085	-0.187548520834	


eigvals	1.03654365499	0.486727634877	0.436010533243	0.344489629748	0.325443839964	0.301867574234
% variation explained	17.9408597738	8.42445197874	7.54662266189	5.96254688461	5.63289569997	5.2248294546
"""

sample_id_map1 = {'CC1A1':'s1','CC2A1':'s2','CP1A1':'s3','CP3A1':'s4',\
'aaa':'s1','bbb':'s2','ccc':'s3','ddd':'s4'}

if __name__ == "__main__":
    main()