File: ngsfilter.pyx

package info (click to toggle)
obitools 3.0.1~b26%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 26,756 kB
  • sloc: ansic: 24,299; python: 657; sh: 27; makefile: 21
file content (720 lines) | stat: -rwxr-xr-x 28,634 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
#cython: language_level=3

from obitools3.apps.progress cimport ProgressBar  # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view import RollbackException, View
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from obitools3.dms.column.column cimport Column, Column_line
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.libalign._freeendgapfm import FreeEndGapFullMatch
from obitools3.libalign.apat_pattern import Primer_search
from obitools3.dms.obiseq cimport Nuc_Seq
from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL
from obitools3.dms.capi.apat cimport MAX_PATTERN
from obitools3.dms.capi.obiview cimport REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN
from obitools3.utils cimport tobytes, str2bytes

from libc.stdint cimport INT32_MAX
from functools import reduce
import math
import sys
from cpython.exc cimport PyErr_CheckSignals
from io import BufferedWriter


MAX_PAT_LEN = 31


__title__="Assign sequence records to the corresponding experiment/sample based on DNA tags and primers"


def addOptions(parser):

    addMinimalInputOption(parser)
    addMinimalOutputOption(parser)
    addNoProgressBarOption(parser)
    
    group = parser.add_argument_group('obi ngsfilter specific options')

    group.add_argument('-t','--info-view',
                     action="store", dest="ngsfilter:info_view",
                     metavar="<URI>",
                     type=str,
                     default=None,
                     required=True,
                     help="URI to the view containing the samples definition (with tags, primers, sample names,...).\n"
                          "\nWarning: primer lengths must be less than or equal to 32")

    group.add_argument('-R', '--reverse-reads',
                     action="store", dest="ngsfilter:reverse",
                     metavar="<URI>",
                     default=None,
                     type=str,
                     help="URI to the reverse reads if the paired-end reads haven't been aligned yet")

    group.add_argument('-u','--unidentified',
                     action="store", dest="ngsfilter:unidentified",
                     metavar="<URI>",
                     type=str,
                     default=None,
                     help="URI to the view used to store the sequences unassigned to any sample. Those sequences are untrimmed.")

    group.add_argument('--no-tags',
                     action="store_true", dest="ngsfilter:notags",
                     default=False,
                     help="Use this option if your experiment does not use tags to identify samples")
    
    group.add_argument('-e','--error',
                     action="store", dest="ngsfilter:error",
                     metavar="###",
                     type=int,
                     default=2,
                     help="Number of errors allowed for matching primers [default = 2]")


class Primer:
    
    collection={}
    
    def __init__(self, sequence, taglength, forward=True, max_errors=2, verbose=False, primer_pair_idx=0, primer_idx=0):
        '''
            
        @param sequence:
        @type sequence:
        @param direct:
        @type direct:
        '''
        
        assert len(sequence) <= MAX_PAT_LEN, "Primer %s is too long, 31 bp max" % sequence
        
        assert sequence not in Primer.collection        \
            or Primer.collection[sequence]==taglength,  \
            "Primer %s must always be used with tags of the same length" % sequence
            
        Primer.collection[sequence]=taglength
        
        self.primer_pair_idx = primer_pair_idx
        self.primer_idx = primer_idx
        self.is_revcomp = False
        self.revcomp = None
        self.raw=sequence
        self.sequence = Nuc_Seq(b"primer", sequence)        
        self.lseq = len(self.sequence)
        self.max_errors=max_errors
        self.taglength=taglength
        self.forward = forward
        self.verbose=verbose
        
    def reverse_complement(self):
        p = Primer(self.raw,
                   self.taglength,
                   not self.forward,
                   verbose=self.verbose,
                   max_errors=self.max_errors,
                   primer_pair_idx=self.primer_pair_idx,
                   primer_idx=self.primer_idx)
        p.sequence=p.sequence.reverse_complement
        p.is_revcomp = True
        p.revcomp = None
        return p
    
    def __hash__(self):
        return hash(str(self.raw))
    
    def __eq__(self,primer):
        return self.raw==primer.raw 
    
    def __call__(self, sequence, same_sequence=False, pattern=0, begin=0):
        
        if len(sequence) <= self.lseq:
            return None
                
        ali = self.aligner.search_one_primer(sequence.seq, 
                                             self.primer_pair_idx, 
                                             self.primer_idx, 
                                             reverse_comp=self.is_revcomp, 
                                             same_sequence=same_sequence,
                                             pattern_ref=pattern,
                                             begin=begin)
        
        if ali is None:  # no match
            return None 
            
        errors, start = ali.first_encountered()
                
        if errors <= self.max_errors:
            end = start + self.lseq
            if self.taglength is not None:
                if self.sequence.is_revcomp:
                    if (len(sequence)-end) >= self.taglength:
                        tag_start = len(sequence) - end - self.taglength
                        tag = sequence.reverse_complement[tag_start:tag_start+self.taglength].seq
                    else:
                        tag=None
                else:
                    if start >= self.taglength:
                        tag = tobytes((sequence[start - self.taglength:start].seq).lower())  # turn back to lowercase because apat turned to uppercase
                    else:
                        tag=None
            else:
                tag=None
            
            return errors,start,end,tag

        return None 
    
    
    def __str__(self):
        return "%s: %s" % ({True:'D',False:'R'}[self.forward],self.raw)
    
    __repr__=__str__


cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False):
    infos = {}
    primer_list = []
    i=0
    for p in info_view:
        
        # Check primer length: should not be longer than 32, the max allowed by the apat lib
        if len(p[b'forward_primer']) > 32:
            raise RollbackException("Error: primers can not be longer than 32bp, rollbacking views")
        if len(p[b'reverse_primer']) > 32:
            raise RollbackException("Error: primers can not be longer than 32bp, rollbacking views")
        
        forward=Primer(p[b'forward_primer'],
                       len(p[b'forward_tag']) if (b'forward_tag' in p and p[b'forward_tag']!=None) else None,
                       True,
                       max_errors=max_errors,
                       verbose=verbose,
                       primer_pair_idx=i,
                       primer_idx=0)
        
        fp = infos.get(forward,{})
        infos[forward]=fp
        
        reverse=Primer(p[b'reverse_primer'],
                       len(p[b'reverse_tag']) if (b'reverse_tag' in p and p[b'reverse_tag']!=None) else None,
                       False,
                       max_errors=max_errors,
                       verbose=verbose,
                       primer_pair_idx=i,
                       primer_idx=1)

        primer_list.append((p[b'forward_primer'], p[b'reverse_primer']))
        
        rp = infos.get(reverse,{})
        infos[reverse]=rp
        
        if not_aligned:
            cf=forward
            cr=reverse
            
            cf.revcomp = forward.reverse_complement()
            cr.revcomp = reverse.reverse_complement()

            dpp=fp.get(cr,{})
            fp[cr]=dpp
            
            rpp=rp.get(cf,{})
            rp[cf]=rpp
        
        else:
            cf=forward.reverse_complement()
            cr=reverse.reverse_complement()
            
            dpp=fp.get(cr,{})
            fp[cr]=dpp
             
            rpp=rp.get(cf,{})
            rp[cf]=rpp

        tags = (p[b'forward_tag'] if (b'forward_tag' in p and p[b'forward_tag']!=None) else None,
                p[b'reverse_tag'] if (b'reverse_tag' in p and p[b'reverse_tag']!=None) else None)
        
        if tags != (None, None):
            assert tags not in dpp, \
               "Tag pair %s is already used with primer pairs: (%s,%s)" % (str(tags),forward,reverse)
        
        # Save additional data
        special_keys = [b'forward_primer', b'reverse_primer', b'forward_tag', b'reverse_tag']
        data={}
        for key in p:
            if key not in special_keys:
                data[key] = p[key]
                            
        dpp[tags] = data
        rpp[tags] = data
        
        i+=1
        
    return infos, primer_list
    

cdef tuple annotate(sequences, infos, no_tags, verbose=False):
        
    def sortMatch(match):
        if match[1] is None:
            return INT32_MAX
        else:
            return match[1][1]

    def sortReverseMatch(match):
        if match[1] is None:
            return -1
        else:
            return match[1][1]
    
    not_aligned = len(sequences) > 1
    sequences[0] = sequences[0].clone()
    
    if not_aligned:
        sequences[0][b"R1_parent"] = sequences[0].id
        sequences[0][b"R2_parent"] = sequences[1].id

    if not_aligned:
        sequences[1] = sequences[1].clone()
        sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq        # used by alignpairedend tool
        sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality     # used by alignpairedend tool

    for seq in sequences:
        if hasattr(seq, "quality_array") and seq.quality_array is not None: 
            q = -reduce(lambda x,y:x+y,(math.log10(z) for z in seq.quality_array),0)/len(seq.quality_array)*10
            seq[b'avg_quality']=q
            q = -reduce(lambda x,y:x+y,(math.log10(z) for z in seq.quality_array[0:10]),0)
            seq[b'head_quality']=q
            if len(seq.quality_array[10:-10]) :
                q = -reduce(lambda x,y:x+y,(math.log10(z) for z in seq.quality_array[10:-10]),0)/len(seq.quality_array[10:-10])*10
                seq[b'mid_quality']=q
            q = -reduce(lambda x,y:x+y,(math.log10(z) for z in seq.quality_array[-10:]),0)
            seq[b'tail_quality']=q
    
    # Try direct matching:
    directmatch = []
    for seq in sequences:
        new_seq = True
        pattern = 0
        for p in infos:
            if pattern == MAX_PATTERN:
                new_seq = True
                pattern = 0
            # Saving original primer as 4th member of the tuple to serve as correct key in infos dict even if it might be reversed complemented (not here)
            directmatch.append((p, p(seq, same_sequence=not new_seq, pattern=pattern), seq, p))
            new_seq = False
            pattern+=1
        
    # Choose match closer to the start of (one of the) sequence(s)
    directmatch = sorted(directmatch, key=sortMatch)
    all_direct_matches = directmatch
    directmatch = directmatch[0] if directmatch[0][1] is not None else None

    if directmatch is None:
        if not_aligned:
            sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq             # used by alignpairedend tool
            sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality     # used by alignpairedend tool
        sequences[0][b'error']=b'No primer match'
        return False, sequences[0]

    if id(directmatch[2]) == id(sequences[0]):
        first_match_first_seq = True
    else:
        first_match_first_seq = False
       
    match = directmatch[2][directmatch[1][1]:directmatch[1][2]]
    
    if not not_aligned:
        sequences[0][b'seq_length_ori']=len(sequences[0])
    
    if not not_aligned or first_match_first_seq:
        sequences[0] = sequences[0][directmatch[1][2]:]
    else:
        sequences[1] = sequences[1][directmatch[1][2]:]
        sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq      # used by alignpairedend tool
        sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality   # used by alignpairedend tool
    
    if directmatch[0].forward:
        sequences[0][b'direction']=b'forward'
        sequences[0][b'forward_errors']=directmatch[1][0]
        sequences[0][b'forward_primer']=directmatch[0].raw
        sequences[0][b'forward_match']=match.seq
        
    else:
        sequences[0][b'direction']=b'reverse'
        sequences[0][b'reverse_errors']=directmatch[1][0]
        sequences[0][b'reverse_primer']=directmatch[0].raw
        sequences[0][b'reverse_match']=match.seq

    # Keep only paired reverse primer
    infos = infos[directmatch[0]]
    reverse_primer = list(infos.keys())[0]
    direct_primer = directmatch[0]
        
    # If not aligned, look for other match in already computed matches (choose the one that makes the biggest amplicon)
    if not_aligned:
        i=1
        # TODO comment
        while i<len(all_direct_matches) and \
            (all_direct_matches[i][1] is None or \
             all_direct_matches[i][0].forward == directmatch[0].forward or \
             all_direct_matches[i][0] == directmatch[0] or \
             reverse_primer != all_direct_matches[i][0]) :
            i+=1
        if i < len(all_direct_matches):
            reversematch = all_direct_matches[i]
        else:
            reversematch = None
        
    # Cut reverse primer out of 1st matched seq if it contains it, because if it's also in the other sequence, the next step will "choose" only the one on the other sequence
    if not_aligned:
        # do it on same seq
        if first_match_first_seq:
            r = reverse_primer.revcomp(sequences[0])
        else:
            r = reverse_primer.revcomp(sequences[1]) 
        if r is not None: # found
            if first_match_first_seq :
                sequences[0] = sequences[0][:r[1]]
            else:
                sequences[1] = sequences[1][:r[1]]
                sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq      # used by alignpairedend tool
                sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality   # used by alignpairedend tool
        # do the same on the other seq
        if first_match_first_seq: 
            r = direct_primer.revcomp(sequences[1])
        else:
            r = direct_primer.revcomp(sequences[0])
        if r is not None: # found
            if first_match_first_seq:
                sequences[1] = sequences[1][:r[1]]
            else:
                sequences[0] = sequences[0][:r[1]] 
                sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq
                sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality
    
    
    # Look for other primer in the other direction on the sequence, or
    # If sequences are not already aligned and reverse primer not found in most likely sequence (the one without the forward primer), try matching on the same sequence than the first match (primer in the other direction)
    if not not_aligned or (not_aligned and (reversematch is None or reversematch[1] is None)):
        if not_aligned and first_match_first_seq:
            seq_to_match = sequences[1]
        else:
            seq_to_match = sequences[0]
        reversematch = []
        # Compute begin
        #begin=directmatch[1][2]+1  # end of match + 1 on the same sequence -- No, already cut out forward primer        
        # Try reverse matching on the other sequence:
        new_seq = True
        pattern = 0
        for p in infos:
            if pattern == MAX_PATTERN:
                new_seq = True
                pattern = 0
            if not_aligned:
                primer=p.revcomp
            else:
                primer=p
            # Saving original primer as 4th member of the tuple to serve as correct key in infos dict even if it might have been reversed complemented
            # (3rd member already used by directmatch)
            reversematch.append((primer, primer(seq_to_match, same_sequence=not new_seq, pattern=pattern, begin=0), None, p))
            new_seq = False
            pattern+=1
        # Choose match closer to the end of the sequence
        reversematch = sorted(reversematch, key=sortReverseMatch, reverse=True)
        all_reverse_matches = reversematch
        reversematch = reversematch[0] if reversematch[0][1] is not None else None
        
    if reversematch is None and None not in infos:
        if directmatch[0].forward:
            message = b'No reverse primer match'
        else:
            message = b'No direct primer match'
        sequences[0][b'error']=message
        return False, sequences[0]
    
    if reversematch is None:
        sequences[0][b'status']=b'partial'
        
        if directmatch[0].forward:
            tags=(directmatch[1][3],None)
        else:
            tags=(None,directmatch[1][3])
            
        samples = infos[None]
        
    else:
        sequences[0][b'status']=b'full'
        
        if not not_aligned or first_match_first_seq:
            match = sequences[0][reversematch[1][1]:reversematch[1][2]]
        else:
            match = sequences[1][reversematch[1][1]:reversematch[1][2]]
        
        match = match.reverse_complement
        
        if not not_aligned:
            sequences[0] = sequences[0][0:reversematch[1][1]]
        elif first_match_first_seq:
            sequences[1] = sequences[1][reversematch[1][2]:]
            if not directmatch[0].forward:
                sequences[1] = sequences[1].reverse_complement
            sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq      # used by alignpairedend tool
            sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality   # used by alignpairedend tool
        else:
            sequences[0] = sequences[0][reversematch[1][2]:]
            
        if directmatch[0].forward:
            tags=(directmatch[1][3], reversematch[1][3])
            sequences[0][b'reverse_errors'] = reversematch[1][0]
            sequences[0][b'reverse_primer'] = reversematch[0].raw
            sequences[0][b'reverse_match'] = match.seq
        
        else:
            tags=(reversematch[1][3], directmatch[1][3])
            sequences[0][b'forward_errors'] = reversematch[1][0]
            sequences[0][b'forward_primer'] = reversematch[0].raw
            sequences[0][b'forward_match'] = match.seq
                
        if tags[0] is not None:
            sequences[0][b'forward_tag'] = tags[0]
        if tags[1] is not None:
            sequences[0][b'reverse_tag'] = tags[1]

        samples = infos[reversematch[3]]
        
    if not directmatch[0].forward:
        sequences[0] = sequences[0].reverse_complement
        sequences[0][b'reversed'] = True   # used by the alignpairedend tool (in kmer_similarity.c)
    else:
        sequences[0][b'reversed'] = False   # used by the alignpairedend tool (in kmer_similarity.c)

    sample=None
    if not no_tags:
        if tags[0] is not None:                                    # Direct  tag known
            if tags[1] is not None:                                # Reverse tag known
                sample = samples.get(tags, None)             
            else:                                                   # Only direct tag known
                s=[samples[x] for x in samples if x[0]==tags[0]]
                if len(s)==1:
                    sample=s[0]
                elif len(s)>1:
                    sequences[0][b'error']=b'Did not found reverse tag'
                    return False, sequences[0]
                else:
                    sample=None
        else: 
            if tags[1] is not None:                                 # Only reverse tag known
                s=[samples[x] for x in samples if x[1]==tags[1]]
                if len(s)==1:
                    sample=s[0]
                elif len(s)>1:
                    sequences[0][b'error']=b'Did not found forward tag'
                    return False, sequences[0]
                else:
                    sample=None
        
        if sample is None:
            sequences[0][b'error']=b"No sample with that tag combination"
            return False, sequences[0]
    
        sequences[0].update(sample)
    
    if not not_aligned:
        sequences[0][b'seq_length']=len(sequences[0])
    
    return True, sequences[0]


def run(config):
        
    DMS.obi_atexit()
    
    logger("info", "obi ngsfilter")

    assert config['ngsfilter']['info_view'] is not None, "Option -t must be specified"

    # Open the input
    
    forward = None
    reverse = None
    input = None
    not_aligned = False
    
    input = open_uri(config['obi']['inputURI'])
    if input is None:
        raise Exception("Could not open input reads")
    if input[2] != View_NUC_SEQS:
        raise NotImplementedError('obi ngsfilter only works on NUC_SEQS views')
    i_dms = input[0]
    
    if "reverse" in config["ngsfilter"]:
        
        forward = input[1]        
        
        rinput = open_uri(config["ngsfilter"]["reverse"])
        if rinput is None:
            raise Exception("Could not open reverse reads")
        if rinput[2] != View_NUC_SEQS:
            raise NotImplementedError('obi ngsfilter only works on NUC_SEQS views')
        
        reverse = rinput[1]
    
        if len(forward) != len(reverse):
            raise Exception("Error: the number of forward and reverse reads are different")
        
        entries = [forward, reverse]
        not_aligned = True

        input_dms_name = [forward.dms.name, reverse.dms.name]
        input_view_name = [forward.name, reverse.name]
    
    else:
        entries = input[1]
        input_dms_name = [entries.dms.name]
        input_view_name = [entries.name]

    
    if not_aligned:
        entries_len = len(forward)
    else:
        entries_len = len(entries)
    
    # Open the output
    output = open_uri(config['obi']['outputURI'],
                      input=False,
                      newviewtype=View_NUC_SEQS)
    
    if output is None:
        raise Exception("Could not create output view")
    
    o_dms = output[0]
    output_0 = output[0]
    o_view = output[1]
    
    # If stdout output, create a temporary view in the input dms that will be deleted afterwards.
    if type(output_0)==BufferedWriter:
        o_dms = i_dms
        o_view_name = b"temp"
        while o_view_name in i_dms:   # Making sure view name is unique in input DMS
            o_view_name = o_view_name+b"_"+str2bytes(str(i))
            i+=1
        o_view = View_NUC_SEQS.new(i_dms, o_view_name)

    # Open the view containing the informations about the tags and the primers
    info_input = open_uri(config['ngsfilter']['info_view'])
    if info_input is None:
        raise Exception("Could not read the view containing the informations about the tags and the primers")
    info_view = info_input[1]
    input_dms_name.append(info_input[0].name)   
    input_view_name.append(info_input[1].name)

    # Open the unidentified view
    if 'unidentified' in config['ngsfilter'] and config['ngsfilter']['unidentified'] is not None:  # TODO keyError if undefined problem
        unidentified_input = open_uri(config['ngsfilter']['unidentified'],
                                      input=False,
                                      newviewtype=View_NUC_SEQS)
        if unidentified_input is None:
            raise Exception("Could not open the view containing the unidentified reads")
        unidentified = unidentified_input[1]
    else:
        unidentified = None
        
    # Initialize the progress bar
    if config['obi']['noprogressbar'] == False:
        pb = ProgressBar(entries_len, config)
    else:
        pb = None

    # Check and store primers and tags
    try:
        infos, primer_list = read_info_view(info_view, max_errors=config['ngsfilter']['error'], verbose=False, not_aligned=not_aligned)   # TODO obi verbose option
    except RollbackException, e:
        if unidentified is not None:
            raise RollbackException("obi ngsfilter error, rollbacking views: "+str(e), o_view, unidentified)
        else:
            raise RollbackException("obi ngsfilter error, rollbacking view: "+str(e), o_view)
    
    aligner = Primer_search(primer_list, config['ngsfilter']['error'])
    
    for p in infos:
        p.aligner = aligner
        for paired_p in infos[p]:
            paired_p.aligner = aligner
            if paired_p.revcomp is not None:
                paired_p.revcomp.aligner = aligner
    
    if not_aligned:   # create columns used by alignpairedend tool
        Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
        Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version)
        
        if unidentified is not None:
            Column.new_column(unidentified, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
            Column.new_column(unidentified, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=unidentified[REVERSE_SEQUENCE_COLUMN].version)
    
    g = 0
    u = 0
    i = 0
    no_tags = config['ngsfilter']['notags']
    try:
        for i in range(entries_len):
            PyErr_CheckSignals()
            if pb is not None:
                pb(i)
            if not_aligned:
                modseq = [Nuc_Seq.new_from_stored(forward[i]), Nuc_Seq.new_from_stored(reverse[i])]
            else:
                modseq = [Nuc_Seq.new_from_stored(entries[i])]
            good, oseq = annotate(modseq, infos, no_tags)
            if good:
                o_view[g].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
                g+=1
            elif unidentified is not None:
                # Untrim sequences (put original back)
                if len(modseq) > 1:
                    oseq[REVERSE_SEQUENCE_COLUMN] = reverse[i].seq
                    oseq[REVERSE_QUALITY_COLUMN] = reverse[i].quality
                    unidentified[u].set(oseq.id, forward[i].seq, definition=oseq.definition, quality=forward[i].quality, tags=oseq)
                else:
                    unidentified[u].set(oseq.id, entries[i].seq, definition=oseq.definition, quality=entries[i].quality, tags=oseq)
                u+=1
    except Exception, e:
        if unidentified is not None:
            raise RollbackException("obi ngsfilter error, rollbacking views: "+str(e), o_view, unidentified)
        else:
            raise RollbackException("obi ngsfilter error, rollbacking view: "+str(e), o_view)
    
    if pb is not None:
        pb(i, force=True)
        print("", file=sys.stderr)

    # Save command config in View and DMS comments
    command_line = " ".join(sys.argv[1:])
    o_view.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
    if unidentified is not None:
        unidentified.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
        # Add comment about unidentified seqs
        unidentified.comments["info"] = "View containing sequences categorized as unidentified by the ngsfilter command"
    o_dms.record_command_line(command_line)
    
    # stdout output: write to buffer
    if type(output_0)==BufferedWriter:
        logger("info", "Printing to output...")
        o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
        o_view.close()

    #print("\n\nOutput view:\n````````````", file=sys.stderr)
    #print(repr(o_view), file=sys.stderr)

    # If stdout output, delete the temporary result view in the input DMS
    if type(output_0)==BufferedWriter:
        View.delete_view(i_dms, o_view_name)

    i_dms.close(force=True)
    o_dms.close(force=True)
    info_input[0].close(force=True)
    if unidentified is not None:
        unidentified_input[0].close(force=True)
    aligner.free()

    logger("info", "Done.")