File: plugin_setup.py

package info (click to toggle)
q2-fragment-insertion 2024.5.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 776 kB
  • sloc: python: 2,004; makefile: 32; sh: 13
file content (176 lines) | stat: -rw-r--r-- 7,943 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# ----------------------------------------------------------------------------
# Copyright (c) 2016-2023, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import importlib

import qiime2.plugin
from qiime2.plugin import Citations
from q2_types.feature_data import FeatureData, Sequence, Taxonomy
from q2_types.feature_table import FeatureTable, Frequency
from q2_types.tree import Phylogeny, Rooted

import q2_fragment_insertion
from q2_fragment_insertion._type import Placements, SeppReferenceDatabase
from q2_fragment_insertion._format import (
    PlacementsFormat, PlacementsDirFmt, SeppReferenceDirFmt, RAxMLinfoFormat)


citations = Citations.load('citations.bib', package='q2_fragment_insertion')

plugin = qiime2.plugin.Plugin(
    name='fragment-insertion',
    version=q2_fragment_insertion.__version__,
    website='https://github.com/qiime2/q2-fragment-insertion',
    short_description='Plugin for extending phylogenies.',
    package='q2_fragment_insertion',
    user_support_text='https://github.com/qiime2/q2-fragment-insertion/issues',
    citations=citations,
)


plugin.methods.register_function(
    function=q2_fragment_insertion.sepp,
    inputs={
        'representative_sequences': FeatureData[Sequence],
        'reference_database': SeppReferenceDatabase,
    },
    parameters={
        'threads': qiime2.plugin.Threads,
        'alignment_subset_size': qiime2.plugin.Int,
        'placement_subset_size': qiime2.plugin.Int,
        'debug': qiime2.plugin.Bool,
    },
    outputs=[
        ('tree', Phylogeny[Rooted]),
        ('placements', Placements),
    ],
    input_descriptions={
        'representative_sequences': 'The sequences to insert into the '
                                    'reference tree.',
        'reference_database': 'The reference database to insert the '
                              'representative sequences into.',
    },
    parameter_descriptions={
        'threads': 'The number of threads to use. Pass 0 to use one per '
                   'available core.',
        'alignment_subset_size': 'Each placement subset is further broken '
                                 'into subsets of at most these many '
                                 'sequences and a separate HMM is trained on '
                                 'each subset.',
        'placement_subset_size': 'The tree is divided into subsets such that '
                                 'each subset includes at most these many '
                                 'subsets. The placement step places the '
                                 'fragment on only one subset, determined '
                                 'based on alignment scores. Further '
                                 'reading: https://github.com/smirarab/sepp/'
                                 'blob/master/tutorial/sepp-tutorial.md#sample'
                                 '-datasets-default-parameters.',
        'debug': 'Collect additional run information to STDOUT for debugging. '
                 'Temporary directories will not be removed if run fails.'
    },
    output_descriptions={
        'tree': 'The tree with inserted feature data.',
        'placements': 'Information about the feature placements within the '
                      'reference tree.',
    },
    name='Insert fragment sequences using SEPP into reference phylogenies.',
    description='Perform fragment insertion of sequences using the SEPP '
                'algorithm.',
)


plugin.methods.register_function(
    function=q2_fragment_insertion.classify_otus_experimental,
    inputs={
        'representative_sequences': FeatureData[Sequence],
        'tree': Phylogeny[Rooted],
        'reference_taxonomy': FeatureData[Taxonomy],
    },
    input_descriptions={
        'representative_sequences': 'The sequences used for a \'sepp\' run '
                                    'to produce the \'tree\'.',
        'tree': 'The tree resulting from inserting fragments into a reference '
                'phylogeny, i.e. the output of function \'sepp\'',
        'reference_taxonomy': 'Reference taxonomic table that maps every '
                              'OTU-ID into a taxonomic lineage string.',
    },
    parameters={},
    parameter_descriptions={},
    outputs=[
        ('classification', FeatureData[Taxonomy]),
    ],
    output_descriptions={
        'classification': 'Taxonomic lineages for inserted fragments.',
    },
    name='Experimental: Obtain taxonomic lineages, by finding closest OTU in '
         'reference phylogeny.',
    description='Experimental: Use the resulting tree from \'sepp\' and find '
                'closest OTU-ID for every inserted fragment. Then, look up '
                'the reference lineage string in the reference taxonomy.',
)


plugin.methods.register_function(
    function=q2_fragment_insertion.filter_features,
    inputs={
        'table': FeatureTable[Frequency],
        'tree': Phylogeny[Rooted],
    },
    input_descriptions={
        'table': 'A feature-table which needs to filtered down to those '
                 'fragments that are contained in the tree, e.g. result of a '
                 'Deblur or DADA2 run.',
        'tree': 'The tree resulting from inserting fragments into a reference '
                'phylogeny, i.e. the output of function \'sepp\'',
    },
    parameters={},
    parameter_descriptions={},
    outputs=[
        ('filtered_table', FeatureTable[Frequency]),
        ('removed_table', FeatureTable[Frequency]),
    ],
    output_descriptions={
        'filtered_table': 'The input table minus those fragments that were '
                          'not part of the tree. This feature-table can be '
                          'used for downstream analyses like phylogenetic '
                          'alpha- or beta- diversity computation.',
        'removed_table': 'Those fragments that got removed from the input '
                         'table, because they were not part of the tree. This '
                         'table is mainly used for quality control, e.g. to '
                         'inspect the ratio of removed reads per sample from '
                         'the input table. You can ignore this table for '
                         'downstream analyses.',
    },
    name='Filter fragments in tree from table.',
    description='Filters fragments not inserted into a phylogenetic tree from '
                'a feature-table. Some fragments computed by e.g. Deblur or '
                'DADA2 are too remote to get inserted by SEPP into a '
                'reference phylogeny. To be able to use the feature-table for '
                'downstream analyses like computing Faith\'s PD or UniFrac, '
                'the feature-table must be cleared of fragments that are not '
                'part of the phylogenetic tree, because their path length can '
                'otherwise not be determined. Typically, the number of '
                'rejected fragments is low (<= 10), but it might be worth to '
                'inspect the ratio of rea' 'ds assigned to those rejected '
                'fragments.',
)


# TODO: rough in method to merge database components
# TODO: rough in method to destructure database components


importlib.import_module('q2_fragment_insertion._transformer')


plugin.register_formats(PlacementsFormat, PlacementsDirFmt, RAxMLinfoFormat,
                        SeppReferenceDirFmt)
plugin.register_semantic_types(Placements, SeppReferenceDatabase)
plugin.register_semantic_type_to_format(Placements,
                                        artifact_format=PlacementsDirFmt)
plugin.register_semantic_type_to_format(SeppReferenceDatabase,
                                        artifact_format=SeppReferenceDirFmt)