1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
|
# ----------------------------------------------------------------------------
# Copyright (c) 2016-2023, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import skbio
from qiime2.sdk import Artifact
from qiime2.plugins.feature_classifier.actions import extract_reads
from q2_types.feature_data._format import DNAFASTAFormat
from . import FeatureClassifierTestPluginBase
class CutterTests(FeatureClassifierTestPluginBase):
package = 'q2_feature_classifier.tests'
def setUp(self):
super().setUp()
self.sequences = Artifact.import_data(
'FeatureData[Sequence]',
self.get_data_path('dna-sequences.fasta'))
self.mixed_sequences = Artifact.import_data(
'FeatureData[Sequence]',
self.get_data_path('dna-sequences-mixed.fasta'))
self.f_primer = 'AGAGA'
self.r_primer = 'GCTGC'
self.amplicons = ['ACGT', 'AAGT', 'ACCT', 'ACGG', 'ACTT']
def _test_results(self, results):
for i, result in enumerate(
skbio.io.read(str(results.reads.view(DNAFASTAFormat)),
format='fasta')):
self.assertEqual(str(result), self.amplicons[i])
def test_extract_reads_expected(self):
results = extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
min_length=4)
self._test_results(results)
def test_extract_reads_expected_forward(self):
results = extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
min_length=4, read_orientation='forward')
self._test_results(results)
def test_extract_mixed(self):
results = extract_reads(
self.mixed_sequences, f_primer=self.f_primer,
r_primer=self.r_primer, min_length=4)
self._test_results(results)
def test_extract_reads_expected_reverse(self):
reverse_sequences = Artifact.import_data(
'FeatureData[Sequence]',
self.get_data_path('dna-sequences-reverse.fasta'))
results = extract_reads(
reverse_sequences, f_primer=self.f_primer, r_primer=self.r_primer,
min_length=4, read_orientation='reverse')
self._test_results(results)
def test_extract_reads_manual_batch_size(self):
results = extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
min_length=4, batch_size=10)
self._test_results(results)
def test_extract_reads_two_jobs(self):
results = extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
min_length=4, n_jobs=2)
self._test_results(results)
def test_extract_reads_expected_degenerate_primers(self):
degenerate_f_primer = 'WWWWW'
degenerate_r_primer = 'SSSSS'
degenerate_sequences = Artifact.import_data(
'FeatureData[Sequence]',
self.get_data_path('dna-sequences-degenerate-primers.fasta'))
results = extract_reads(
degenerate_sequences, f_primer=degenerate_f_primer,
r_primer=degenerate_r_primer, min_length=4)
self._test_results(results)
def test_extract_reads_expected_trim_right(self):
"""Tests expected behavior of trim_right option"""
results = extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
min_length=3, trim_right=1)
for i, result in enumerate(
skbio.io.read(str(results.reads.view(DNAFASTAFormat)),
format='fasta')):
self.assertEqual(str(result), self.amplicons[i][:-1])
def test_extract_reads_fail_identity(self):
with self.assertRaisesRegex(RuntimeError, "No matches found"):
extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
min_length=4, identity=1)
def test_extract_reads_fail_min_length(self):
with self.assertRaisesRegex(RuntimeError, "No matches found"):
extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
min_length=5)
def test_extract_reads_fail_max_length(self):
with self.assertRaisesRegex(RuntimeError, "No matches found"):
extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
max_length=1)
def test_extract_reads_fail_trim_left_entire_read(self):
with self.assertRaisesRegex(RuntimeError, "No matches found"):
extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
trim_left=4)
def test_extract_reads_fail_trim_right_entire_read(self):
with self.assertRaisesRegex(RuntimeError, "No matches found"):
extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
trim_right=4)
def test_extract_reads_fail_trim_both_entire_read(self):
with self.assertRaisesRegex(RuntimeError, "No matches found"):
extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
trim_left=2, trim_right=2)
def test_extract_reads_fail_min_len_greater_than_trunc_len(self):
with self.assertRaisesRegex(ValueError, "minimum length setting"):
extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
trunc_len=1)
|