File: test_uc_processor.py

package info (click to toggle)
python-biom-format 2.1.5%2Bdfsg-7
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 51,844 kB
  • ctags: 1,341
  • sloc: python: 11,620; makefile: 128; sh: 65
file content (109 lines) | stat: -rw-r--r-- 4,270 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/env python

# -----------------------------------------------------------------------------
# Copyright (c) 2011-2015, The BIOM Format Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
# -----------------------------------------------------------------------------

import tempfile
from unittest import TestCase, main

import numpy as np

import biom
from biom.cli.uc_processor import _from_uc

class TestUcProcessor(TestCase):

    def setUp(self):
        """Set up data for use in unit tests."""
        self.cmd = _from_uc
        self.uc_minimal = uc_minimal.split('\n')
        self.uc = uc.split('\n')
        self.rep_set = rep_set.split('\n')
        self.rep_set_no_mapping = rep_set_no_mapping.split('\n')
        self.rep_set_missing_id = rep_set_missing_id.split('\n')

    def test_basic(self):
        obs = self.cmd(self.uc_minimal)
        expected = biom.Table(np.array([[1.0]]),
                              observation_ids=['f2_1539'],
                              sample_ids=['f2'])
        self.assertEqual(obs, expected)

    def test_basic_w_mapping(self):
        obs = self.cmd(self.uc_minimal, self.rep_set)
        expected = biom.Table(np.array([[1.0]]),
                              observation_ids=['otu1'],
                              sample_ids=['f2'])
        self.assertEqual(obs, expected)

    def test_rep_set_no_mapping(self):
        self.assertRaises(ValueError, self.cmd, self.uc_minimal,
                          self.rep_set_no_mapping)

    def test_rep_set_missing_id(self):
        self.assertRaises(ValueError, self.cmd, self.uc_minimal,
                          self.rep_set_missing_id)

    def test_uc(self):
        obs = self.cmd(self.uc)
        expected = biom.Table(np.array([[1.0, 1.0], [0.0, 1.0]]),
                              observation_ids=['f2_1539', 'f3_1540'],
                              sample_ids=['f2', 'f3'])
        self.assertEqual(obs, expected)

    def test_uc_w_mapping(self):
        obs = self.cmd(self.uc, self.rep_set)
        expected = biom.Table(np.array([[1.0, 1.0], [0.0, 1.0]]),
                              observation_ids=['otu1', 'otu2'],
                              sample_ids=['f2', 'f3'])
        self.assertEqual(obs, expected)

uc_minimal = """# uclust --input /var/folders/xq/0kh93ng53bs6zzk091w_bbsr0000gn/T/UclustExactMatchFilterrW47Ju.fasta --id 0.97 --tmpdir /var/folders/xq/0kh93ng53bs6zzk091w_bbsr0000gn/T --w 8 --stepwords 8 --usersort --maxaccepts 1 --stable_sort --maxrejects 8 --uc dn-otus/uclust_picked_otus/seqs_clusters.uc
# version=1.2.22
# Tab-separated fields:
# 1=Type, 2=ClusterNr, 3=SeqLength or ClusterSize, 4=PctId, 5=Strand, 6=QueryStart, 7=SeedStart, 8=Alignment, 9=QueryLabel, 10=TargetLabel
# Record types (field 1): L=LibSeed, S=NewSeed, H=Hit, R=Reject, D=LibCluster, C=NewCluster, N=NoHit
# For C and D types, PctId is average id with seed.
# QueryStart and SeedStart are zero-based relative to start of sequence.
# If minus strand, SeedStart is relative to reverse-complemented seed.
S	0	133	*	*	*	*	*	f2_1539	*
"""

uc = """# uclust --input /var/folders/xq/0kh93ng53bs6zzk091w_bbsr0000gn/T/UclustExactMatchFilterrW47Ju.fasta --id 0.97 --tmpdir /var/folders/xq/0kh93ng53bs6zzk091w_bbsr0000gn/T --w 8 --stepwords 8 --usersort --maxaccepts 1 --stable_sort --maxrejects 8 --uc dn-otus/uclust_picked_otus/seqs_clusters.uc
# version=1.2.22
# Tab-separated fields:
# 1=Type, 2=ClusterNr, 3=SeqLength or ClusterSize, 4=PctId, 5=Strand, 6=QueryStart, 7=SeedStart, 8=Alignment, 9=QueryLabel, 10=TargetLabel
# Record types (field 1): L=LibSeed, S=NewSeed, H=Hit, R=Reject, D=LibCluster, C=NewCluster, N=NoHit
# For C and D types, PctId is average id with seed.
# QueryStart and SeedStart are zero-based relative to start of sequence.
# If minus strand, SeedStart is relative to reverse-complemented seed.
S	0	133	*	*	*	*	*	f2_1539	*
S	0	133	*	*	*	*	*	f3_1540	*
H	0	141	100.0	+	0	0	133M8D	f3_42	f2_1539
"""

rep_set = """>otu1 f2_1539
ACGT
>otu2 f3_1540
ACCT
"""

rep_set_no_mapping = """>otu1
ACGT
>otu2
ACCT
"""

rep_set_missing_id = """>otu1 f99_1539
ACGT
>otu2 f99_1539
ACCT
"""

if __name__ == '__main__':
    main()