File: ecopcr.pyx

package info (click to toggle)
obitools 3.0.1~b26%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 26,756 kB
  • sloc: ansic: 24,299; python: 657; sh: 27; makefile: 21
file content (242 lines) | stat: -rwxr-xr-x 10,167 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
#cython: language_level=3

from obitools3.apps.progress cimport ProgressBar  # @UnresolvedImport
from obitools3.dms.dms cimport DMS
from obitools3.dms.capi.obidms cimport OBIDMS_p
from obitools3.dms.view import RollbackException
from obitools3.dms.capi.obiecopcr cimport obi_ecopcr
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addTaxonomyOption, addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from obitools3.dms.view import View

from libc.stdlib  cimport malloc, free
from libc.stdint  cimport int32_t

import sys
from io import BufferedWriter


__title__="in silico PCR"


# TODO: add option to output unique ids
def addOptions(parser):

    addMinimalInputOption(parser)
    addTaxonomyOption(parser)
    addMinimalOutputOption(parser)
    addNoProgressBarOption(parser)

 
    group = parser.add_argument_group('obi ecopcr specific options')

    group.add_argument('--primer1', '-F',
                       action="store", dest="ecopcr:primer1",
                       metavar='<PRIMER>',
                       type=str,
                       required=True,
                       help="Forward primer, length must be less than or equal to 32")

    group.add_argument('--primer2', '-R',
                       action="store", dest="ecopcr:primer2",
                       metavar='<PRIMER>',
                       type=str,
                       required=True,
                       help="Reverse primer, length must be less than or equal to 32")

    group.add_argument('--error', '-e',
                       action="store", dest="ecopcr:error",
                       metavar='<ERROR>',
                       default=0,
                       type=int,
                       help="Maximum number of errors (mismatches) allowed per primer. Default: 0.")

    group.add_argument('--min-length', '-l',
                       action="store", 
                       dest="ecopcr:min-length",
                       metavar="<MINIMUM LENGTH>",
                       type=int,
                       default=0,
                       help="Minimum length of the in silico amplified DNA fragment, excluding primers.")
    
    group.add_argument('--max-length', '-L',
                       action="store", 
                       dest="ecopcr:max-length",
                       metavar="<MAXIMUM LENGTH>",
                       type=int,
                       default=0,
                       help="Maximum length of the in silico amplified DNA fragment, excluding primers.")
 
    group.add_argument('--restrict-to-taxid', '-r',
                       action="append", 
                       dest="ecopcr:restrict-to-taxid",
                       metavar="<TAXID>",
                       type=int,
                       default=[],
                       help="Only the sequence records corresponding to the taxonomic group identified "
                            "by TAXID are considered for the in silico PCR. The TAXID is an integer "
                            "that can be found in the NCBI taxonomic database.")
 
    group.add_argument('--ignore-taxid', '-i',
                       action="append", 
                       dest="ecopcr:ignore-taxid",
                       metavar="<TAXID>",
                       type=int,
                       default=[],
                       help="The sequences of the taxonomic group identified by TAXID are not considered for the in silico PCR.")

    group.add_argument('--circular', '-c',
                       action="store_true", 
                       dest="ecopcr:circular",
                       default=False,
                       help="Considers that the input sequences are circular (e.g. mitochondrial or chloroplastic DNA).")

    group.add_argument('--salt-concentration', '-a',
                       action="store", 
                       dest="ecopcr:salt-concentration",
                       metavar="<FLOAT>",
                       type=float,
                       default=0.05,
                       help="Salt concentration used for estimating the Tm. Default: 0.05.")

    group.add_argument('--salt-correction-method', '-m',
                       action="store", 
                       dest="ecopcr:salt-correction-method",
                       metavar="<1|2>",
                       type=int,
                       default=1,
                       help="Defines the method used for estimating the Tm (melting temperature) between the primers and their corresponding "
                            "target sequences. SANTALUCIA: 1, or OWCZARZY: 2. Default: 1.")

    group.add_argument('--keep-primers', '-p',
                       action="store_true", 
                       dest="ecopcr:keep-primers",
                       default=False,
                       help="Whether to keep the primers attached to the output sequences (default: the primers are cut out).")

    group.add_argument('--keep-nucs', '-D',
                       action="store", 
                       dest="ecopcr:keep-nucs",
                       metavar="<N>",
                       type=int,
                       default=0,
                       help="Keeps N nucleotides on each side of the in silico amplified sequences, "
                            "not including the primers (implying that primers are automatically kept if N > 0).")

    group.add_argument('--kingdom-mode', '-k',
                       action="store_true", 
                       dest="ecopcr:kingdom-mode",
                       default=False,
                       help="Print in the output the kingdom of the in silico amplified sequences (default: print the superkingdom).")

 

def run(config):

    cdef int32_t* restrict_to_taxids_p = NULL
    cdef int32_t* ignore_taxids_p = NULL
    
    restrict_to_taxids_len = len(config['ecopcr']['restrict-to-taxid'])
    restrict_to_taxids_p = <int32_t*> malloc((restrict_to_taxids_len + 1) * sizeof(int32_t))   # +1 for the -1 flagging the end of the array
    for i in range(restrict_to_taxids_len) :
        restrict_to_taxids_p[i] = config['ecopcr']['restrict-to-taxid'][i]
    restrict_to_taxids_p[restrict_to_taxids_len] = -1

    ignore_taxids_len = len(config['ecopcr']['ignore-taxid'])
    ignore_taxids_p = <int32_t*> malloc((ignore_taxids_len + 1) * sizeof(int32_t))   # +1 for the -1 flagging the end of the array
    for i in range(ignore_taxids_len) :
        ignore_taxids_p[i] = config['ecopcr']['ignore-taxid'][i]
    ignore_taxids_p[ignore_taxids_len] = -1
    
    DMS.obi_atexit()
    
    logger("info", "obi ecopcr")

    # Open the input: only the DMS
    input = open_uri(config['obi']['inputURI'],
                     dms_only=True)
    if input is None:
        raise Exception("Could not read input")
    i_dms = input[0]
    i_dms_name = input[0].name
    i_view_name = input[1]

    # Open the output: only the DMS
    output = open_uri(config['obi']['outputURI'],
                      input=False,
                      dms_only=True)
    if output is None:
        raise Exception("Could not create output")
    o_dms = output[0]
    output_0 = output[0]
    o_dms_name = output[0].name
    o_view_name = output[1]

    # Open the taxonomy DMS
    taxdms = open_uri(config['obi']['taxoURI'],
                     dms_only=True)
    if taxdms is None:
        raise Exception("Could not open taxonomy DMS")
    tax_dms = taxdms[0]
    tax_dms_name = taxdms[0].name
    
    # Read taxonomy name    
    taxonomy_name = config['obi']['taxoURI'].split("/")[-1]   # Robust in theory

    # If stdout output create a temporary view in the input dms that will be deleted afterwards.
    if type(output_0)==BufferedWriter:
        o_dms = i_dms
        o_view_name = b"temp"
        while o_view_name in i_dms:   # Making sure view name is unique in input DMS
            o_view_name = o_view_name+b"_"+str2bytes(str(i))
            i+=1
    
    # Save command config in View comments
    command_line = " ".join(sys.argv[1:])
    input_dms_name=[i_dms_name]
    input_view_name= [i_view_name]
    input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
    input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])

    comments = View.print_config(config, "ecopcr", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)

    # TODO: primers in comments?

    if obi_ecopcr(i_dms.name_with_full_path, tobytes(i_view_name), 
                  tax_dms.name_with_full_path, tobytes(taxonomy_name), \
                  o_dms.name_with_full_path, tobytes(o_view_name), comments, \
                  tobytes(config['ecopcr']['primer1']), tobytes(config['ecopcr']['primer2']), \
                  config['ecopcr']['error'], \
                  config['ecopcr']['min-length'], config['ecopcr']['max-length'], \
                  restrict_to_taxids_p, ignore_taxids_p, \
                  config['ecopcr']['circular'], config['ecopcr']['salt-concentration'], config['ecopcr']['salt-correction-method'], \
                  config['ecopcr']['keep-nucs'], config['ecopcr']['keep-primers'], config['ecopcr']['kingdom-mode']) < 0:
        raise Exception("Error running ecopcr")

    # Save command config in DMS comments
    o_dms.record_command_line(command_line)

    free(restrict_to_taxids_p)
    free(ignore_taxids_p)

    # stdout output: write to buffer
    if type(output_0)==BufferedWriter:
        logger("info", "Printing to output...")
        o_view = o_dms[o_view_name]
        o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
        o_view.close()

    #print("\n\nOutput view:\n````````````", file=sys.stderr)
    #print(repr(o_dms[o_view_name]), file=sys.stderr)

    # If stdout output, delete the temporary result view in the input DMS
    if type(output_0)==BufferedWriter:
        View.delete_view(i_dms, o_view_name)

    i_dms.close(force=True)
    o_dms.close(force=True)

    logger("info", "Done.")