1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
|
# -----------------------------------------------------------------------------
# Copyright (c) 2011-2017, The BIOM Format Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
# -----------------------------------------------------------------------------
import os
import unittest
import numpy.testing as npt
from biom.cli.table_subsetter import _subset_table
from biom.parse import parse_biom_table
from biom.util import HAVE_H5PY
class TestSubsetTable(unittest.TestCase):
def setUp(self):
"""Set up data for use in unit tests."""
self.biom_str1 = biom1
def test_subset_samples(self):
"""Correctly subsets samples in a table."""
obs = _subset_table(json_table_str=self.biom_str1, axis='sample',
ids=['f4', 'f2'], hdf5_biom=None)
obs = parse_biom_table(list(obs[0]))
self.assertEqual(len(obs.ids()), 2)
self.assertEqual(len(obs.ids(axis='observation')), 14)
self.assertTrue('f4' in obs.ids())
self.assertTrue('f2' in obs.ids())
def test_subset_observations(self):
"""Correctly subsets observations in a table."""
obs = _subset_table(json_table_str=self.biom_str1, axis='observation',
ids=['None2', '879972'], hdf5_biom=None)
obs = parse_biom_table(list(obs[0]))
self.assertEqual(len(obs.ids()), 9)
self.assertEqual(len(obs.ids(axis='observation')), 2)
self.assertTrue('None2' in obs.ids(axis='observation'))
self.assertTrue('879972' in obs.ids(axis='observation'))
def test_invalid_input(self):
"""Correctly raises politically correct error upon invalid input."""
with self.assertRaises(ValueError):
_subset_table(hdf5_biom=None, json_table_str=self.biom_str1,
axis='foo', ids=['f2', 'f4'])
with self.assertRaises(ValueError):
_subset_table(hdf5_biom=None, json_table_str=None, axis='sample',
ids=['f2', 'f4'])
with self.assertRaises(ValueError):
_subset_table(json_table_str=self.biom_str1, hdf5_biom='foo',
axis='sample', ids=['f2', 'f4'])
@npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
def test_subset_samples_hdf5(self):
"""Correctly subsets samples in a hdf5 table"""
cwd = os.getcwd()
if '/' in __file__:
os.chdir(__file__.rsplit('/', 1)[0])
obs = _subset_table(hdf5_biom='test_data/test.biom', axis='sample',
ids=[u'Sample1', u'Sample2', u'Sample3'],
json_table_str=None)
os.chdir(cwd)
obs = obs[0]
self.assertEqual(len(obs.ids()), 3)
self.assertEqual(len(obs.ids(axis='observation')), 5)
self.assertTrue(u'Sample1' in obs.ids())
self.assertTrue(u'Sample2' in obs.ids())
self.assertTrue(u'Sample3' in obs.ids())
@npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
def test_subset_observations_hdf5(self):
"""Correctly subsets samples in a hdf5 table"""
cwd = os.getcwd()
if '/' in __file__:
os.chdir(__file__.rsplit('/', 1)[0])
obs = _subset_table(hdf5_biom='test_data/test.biom',
axis='observation',
ids=[u'GG_OTU_1', u'GG_OTU_3', u'GG_OTU_5'],
json_table_str=None)
os.chdir(cwd)
obs = obs[0]
self.assertEqual(len(obs.ids()), 4)
self.assertEqual(len(obs.ids(axis='observation')), 3)
self.assertTrue(u'GG_OTU_1' in obs.ids(axis='observation'))
self.assertTrue(u'GG_OTU_3' in obs.ids(axis='observation'))
self.assertTrue(u'GG_OTU_5' in obs.ids(axis='observation'))
biom1 = ('{"id": "None","format": "Biological Observation Matrix 1.0.0",'
'"format_url": "http://biom-format.org","type": "OTU table",'
'"generated_by": "QIIME 1.6.0-dev","date": '
'"2013-02-09T09:30:11.550590","matrix_type": "sparse",'
'"matrix_element_type": "int","shape": [14, 9],"data": '
'[[0,0,20],[0,1,18],[0,2,18],[0,3,22],[0,4,4],[1,4,1],[2,0,1],[2,4,1]'
',[2,5,1],[3,6,1],[4,4,1],[5,7,20],[6,4,1],[7,4,1],[7,5,1],[8,4,1],'
'[8,6,2],[8,8,3],[9,7,2],[10,5,1],[11,4,9],[11,5,20],[11,6,1],'
'[11,8,4],[12,4,3],[12,6,19],[12,8,15],[13,0,1],[13,1,4],[13,2,4]],'
'"rows": [{"id": "295053", "metadata": {"taxonomy": ["k__Bacteria"]}}'
',{"id": "42684", "metadata": {"taxonomy": ["k__Bacteria", '
'"p__Proteobacteria"]}},{"id": "None11", "metadata": {"taxonomy": '
'["Unclassified"]}},{"id": "None10", "metadata": {"taxonomy": '
'["Unclassified"]}},{"id": "None7", "metadata": {"taxonomy": '
'["Unclassified"]}},{"id": "None6", "metadata": {"taxonomy": '
'["Unclassified"]}},{"id": "None5", "metadata": {"taxonomy": '
'["k__Bacteria"]}},{"id": "None4", "metadata": {"taxonomy": '
'["Unclassified"]}},{"id": "None3", "metadata": {"taxonomy": '
'["k__Bacteria"]}},{"id": "None2", "metadata": {"taxonomy": '
'["k__Bacteria"]}},{"id": "None1", "metadata": {"taxonomy": '
'["Unclassified"]}},{"id": "879972", "metadata": {"taxonomy": '
'["k__Bacteria"]}},{"id": "None9", "metadata": {"taxonomy": '
'["Unclassified"]}},{"id": "None8", "metadata": {"taxonomy": '
'["k__Bacteria"]}}],"columns": [{"id": "f2", "metadata": null},'
'{"id": "f1", "metadata": null},{"id": "f3", "metadata": null},'
'{"id": "f4", "metadata": null},{"id": "p2", "metadata": null},{"id":'
' "p1", "metadata": null},{"id": "t1", "metadata": null},{"id": '
'"not16S.1", "metadata": null},{"id": "t2", "metadata": null}]}')
if __name__ == "__main__":
unittest.main()
|