File: test_insertion.py

package info (click to toggle)
q2-fragment-insertion 2024.5.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 776 kB
  • sloc: python: 2,004; makefile: 32; sh: 13
file content (142 lines) | stat: -rw-r--r-- 5,614 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# ----------------------------------------------------------------------------
# Copyright (c) 2016-2023, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------

import os.path
import shutil
import unittest

import biom
import skbio
import pandas as pd
from pandas.testing import assert_frame_equal

from qiime2.sdk import Artifact
from qiime2.plugin.testing import TestPluginBase

from q2_types.feature_data import DNAIterator
import pytest

@pytest.mark.skip("sepp is not in testing any more since pplacer is lost due to ocaml bindings missing in latest mcl")
class TestSepp(TestPluginBase):
    package = 'q2_fragment_insertion.tests'

    def _cp_fp(self, frm, to):
        shutil.copy(self.get_data_path(frm),
                    os.path.join(self.temp_dir.name, to))

    def setUp(self):
        super().setUp()
        self.action = self.plugin.actions['sepp']

        input_sequences_fp = self.get_data_path('seqs-to-query.fasta')
        self.input_sequences = Artifact.import_data('FeatureData[Sequence]',
                                                    input_sequences_fp)

        self._cp_fp('ref-tree.nwk', 'tree.nwk')
        self._cp_fp('ref-seqs-aligned.fasta', 'aligned-dna-sequences.fasta')
        self._cp_fp('ref-raxml-info.txt', 'raxml-info.txt')

        self.reference_db = Artifact.import_data('SeppReferenceDatabase',
                                                 self.temp_dir.name)

    def test_exercise_sepp(self):
        obs_tree_artifact, obs_placements_artifact = self.action(
            self.input_sequences, self.reference_db,
            alignment_subset_size=1000, placement_subset_size=5000)

        tree = obs_tree_artifact.view(skbio.TreeNode)
        obs_tree = {n.name for n in tree.tips()}
        seqs = {r.metadata['id'] for r
                in self.input_sequences.view(DNAIterator)}
        self.assertTrue(seqs <= obs_tree)

        obs_placements = obs_placements_artifact.view(dict)
        self.assertEqual(set(obs_placements.keys()),
                         {'tree', 'placements', 'metadata', 'version',
                          'fields'})


class TestClassify(TestPluginBase):
    package = 'q2_fragment_insertion.tests'

    def setUp(self):
        super().setUp()
        self.action = self.plugin.actions['classify_otus_experimental']

        input_sequences_fp = self.get_data_path('seqs-to-query.fasta')
        self.input_sequences = Artifact.import_data('FeatureData[Sequence]',
                                                    input_sequences_fp)

        tree_fp = self.get_data_path('sepp-results.nwk')
        self.tree = Artifact.import_data('Phylogeny[Rooted]', tree_fp)

        taxa_fp = self.get_data_path('ref-taxa.tsv')
        self.taxonomy = Artifact.import_data('FeatureData[Taxonomy]', taxa_fp)

    def test_exercise_classify_otus_experimental(self):
        obs_artifact, = self.action(self.input_sequences, self.tree,
                                    self.taxonomy)
        obs = obs_artifact.view(pd.DataFrame)

        exp_artifact = Artifact.import_data(
            'FeatureData[Taxonomy]', self.get_data_path('sepp-results.tsv'))
        exp = exp_artifact.view(pd.DataFrame)

        assert_frame_equal(obs, exp)

    def test_mismatched_tree(self):
        # Just load up the reference tree instead of creating new test data
        wrong_tree_fp = self.get_data_path('ref-tree.nwk')
        wrong_tree = Artifact.import_data('Phylogeny[Rooted]', wrong_tree_fp)
        with self.assertRaisesRegex(ValueError, 'None of.*can be found.*'):
            self.action(self.input_sequences, wrong_tree, self.taxonomy)

    def test_mismatched_taxonomy(self):
        wrong_taxa_fp = self.get_data_path('another-ref-taxa.tsv')
        wrong_taxa = Artifact.import_data('FeatureData[Taxonomy]',
                                          wrong_taxa_fp)
        with self.assertRaisesRegex(ValueError,
                                    'Not all OTUs.*1 feature.*\n.*879972'):
            self.action(self.input_sequences, self.tree, wrong_taxa)


class TestFilter(TestPluginBase):
    package = 'q2_fragment_insertion.tests'

    def setUp(self):
        super().setUp()
        self.action = self.plugin.actions['filter_features']

        table_fp = self.get_data_path('table.json')
        self.table = Artifact.import_data('FeatureTable[Frequency]', table_fp,
                                          view_type='BIOMV100Format')

        tree_fp = self.get_data_path('sepp-results.nwk')
        self.tree = Artifact.import_data('Phylogeny[Rooted]', tree_fp)

    def test_exercise_filter_features(self):
        filtered_table_artifact, removed_table_artifact = self.action(
            self.table, self.tree)

        filtered_table = filtered_table_artifact.view(biom.Table)
        removed_table = removed_table_artifact.view(biom.Table)

        self.assertEqual(filtered_table.sum(), 1247)
        self.assertEqual(removed_table.sum(), 1224)

    def test_filter_features_nooverlap(self):
        # Just load up the reference tree instead of creating new test data
        wrong_tree_fp = self.get_data_path('ref-tree.nwk')
        wrong_tree = Artifact.import_data('Phylogeny[Rooted]', wrong_tree_fp)
        with self.assertRaisesRegex(ValueError,
                                    'Not a single fragment.*empty'):
            self.action(self.table, wrong_tree)


if __name__ == '__main__':
    unittest.main()