File: Support.py

package info (click to toggle)
pbsuite 15.8.24%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 14,512 kB
  • ctags: 1,951
  • sloc: python: 10,962; sh: 147; xml: 21; makefile: 14
file content (870 lines) | stat: -rwxr-xr-x 34,218 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
#!/usr/bin/python
import re, os, sys, logging, json, copy
from bisect import bisect_left, bisect_right
from optparse import OptionParser
from collections import defaultdict

from pbsuite.utils.setupLogging import *
from pbsuite.utils.FileHandlers import *

from networkx import Graph

USAGE = """Support.py <alignmentFile> <gapInfo> <outFile>

Parses an .m4 or .m5 alignment file and determines which gaps in gapInfo are
supported by reads. As well as Results reported in outFile"""

"""
TODO

Need ability to place scaffolds within captured gaps
    >scaf1
    ====.....====
    >scaf2
    ---

    Actual Genome

    ====.---.====
    That plus the pacbio evidence could polish off that entire gap
"""
#Max Distance a read's alignment can stop from a gap and still be considered 
#for support of the gap
MAXFLANK = 50
#Min number of bases a read needs to reach into a gap to be considered for 
#support of the gap
MINCOVERS = 25

SUPPORTFLAGS = enum(none =    0, \
                    left =    1, \
                    right =   2, \
                    span =    3, \
                    contain = 8  )

ALIGNFLAGS = enum(uniqueMapping = 1, \
                  multiMapping  = 2, \
                  lqpa          = 4, \
                  mostAccurate  = 8, \
                  bestScore     = 16 )

class AlignmentConnector():
    """
    Helper object that will group hits, connect alignments, identify 
    missed adapters, test if a read maps concordantly or discordantly, 
    and test if alignments are within regions
    
    Takes a set of alignments, and parses multi-mapping information
    in order to produce a single alignment.
    """
    
    def __init__(self):
        """
        Only has static methods
        """
        pass
    
    def parseAlignments(self, alignments, minMapq, idAdapters = True):
        """
        Given a set of alignments (from one or more read) put them through the paces
        and return a list of lists of alignments.
        """
        groups = self.groupReadHits(alignments, minMapq)
        
        ret = {}
        for key in groups:
            r = self.connect(groups[key])
            if idAdapters and self.idAdapters(r):
                # if it has adapters, I'll need to 
                # separate the pieces -- idAdapters renames them
                for i in r:
                    logging.debug("About to set qname on %s" % (i))
                    logging.debug(r)
                    try:
                        i.qname = i.qname + "##%d#%d##" % i.trim
                    except TypeError:
                        #didn't get trimmed (happens with 3 subreads per
                        #a single read - only one missed adapter)
                        pass
                    ret[i.qname] = [i]
            else:
                ret[key] = r
        
        return ret.values()
            
    def connect(self, hits, sameStrand=False, sameTar=False):
        """
        Standard procedure to take a group of alignments and 
        connect concordant hits while also using ALIGNFLAGS to characterize
        each read

        Input: a list of hits from a read
        Return: Concordant Reads classified

        Use sameStrand and sameTar(get) to restrict the definition of 
        concordant
        """
        ret = copy.deepcopy(hits)
        con = True
        for h in ret:
            if h.flag != 0:
                con = False
        
        if con:#Don't group compare twice
            self.groupComparison(ret)
        
        ret = self.untangle(ret)
        return ret
        
    def groupComparison(self, hits, flag=True):
        """
        Find the best LQPA, ACC, and HISCORE
        if flag == False:
            we don't change the flag, we just return a list of lists
            [ [bestScore], [mostAccurate], [lqpa] ]
        """
        #The number behind the best; the list of bests (Cause there can be ties)
        LQPA = hits[0].queryPctAligned; lqpas = [hits[0]]
        ACC = hits[0].pctsimilarity; accs = [hits[0]]
        SCORE = hits[0].score; scores = [hits[0]]
        #Find the bests
        for align in hits[1:]:
            if align.queryPctAligned == LQPA:
                lqpas.append(align)
            elif align.queryPctAligned > LQPA:
                LQPA = align.queryPctAligned
                lqpas = [align]
                
            if align.pctsimilarity == ACC:
                accs.append(align)
            elif align.pctsimilarity > ACC:
                ACC = align.pctsimilarity
                accs = [align]
            
            if align.score == SCORE:
                scores.append(align)
            elif align.score < SCORE:
                SCORE = align.score
                scores = [align]
        #Set the apropriate flags
        if not flag:
            return [scores, accs, lqpas]
        for align in lqpas:
            align.flag += ALIGNFLAGS.lqpa 
        for align in accs:
            align.flag += ALIGNFLAGS.mostAccurate
        for align in scores:
            align.flag += ALIGNFLAGS.bestScore
    
    def mappingType(self, hits):
        """
        Sets Reads flag based on Alignment 
        "UniqueMapping", "MultiMapping"
        """
        logging.debug("Determining Read's mapping type from hits")
        fInc = 0
        if len(hits) == 1:
            fInc = ALIGNFLAGS.uniqueMapping
        else:
            fInc = ALIGNFLAGS.multiMapping
        for i in hits:
            i.flag += fInc

    def isQueryConcordant(self, A, B, sameStrand=False):
        """
        Takes two reads (A,B) and checks their mapping concordancy
        meaning the query alignment positions are reasonable 
        given a sequencing read
        
        for example:
            concordant
                q ===---   ---===
                t ===============
            !concordant
                q ---===   ---===
                t ===============

        sameStrand == True:
            read's hits need to be on the sameStrand

        Return  0 for False
        Return  1 for A is upstream (5' - 3') of B
        Return -1 for B is upstream 
        """
        if sameStrand and A.tstrand != B.tstrand:
            return 0
        
        if A.qstart < A.qend < B.qstart < B.qend:#
            return 1
        if B.qstart < B.qend < A.qstart < A.qend:
            return -1
        
        return 0
        
    def isTargetConcordant(self, A, B, sameStrand=False, sameTar=False):
        """
        Takes two alignments and checks if they're concordant. 
        if sameStrand:
            hits must be on the same strand
        if sameTar:
            hits must be on the same target

        Return  0 for False
        Return  1 for A is upstream (5' - 3') of B
        Return -1 for B is upstream 

        Question:
            Can you have hits that are target concordant but not 
            query concordant? 

            queryA ==-
            queryB  -==
            target ----
            A deletion?
        """
        if sameStrand and a.tstrand != B.tstrand:
            return 0
            
        if sameTar and A.tname != B.tname:
            return 0
            
        if A.tend < B.tstart and A.qend < B.qstart:
            return 1
        if B.tend < A.tstart and B.qend < A.qstart:
            return -1
        
        return 0
    
    def extendsTarget(self, alignment, maxFlank=50, minCovers=25):
        """
        Checks to see if a read extends it's target
        Returns the direction to which the target is 
        extended using SUPPORTFLAGS
        """
        ret = SUPPORTFLAGS.none
        logging.debug("maxFlank %d - minCovers %d" % (maxFlank, minCovers))
        logging.debug("Checking 5End of Scaff %s %s" % (alignment.tname, alignment.qname))
        ret += self.supportsRegion(alignment, alignment.tname, \
                                         -sys.maxint, 0, maxFlank, minCovers)
        logging.debug("Checking 3End of Scaff %s %s" % (alignment.tname, alignment.qname))
        ret += self.supportsRegion(alignment, alignment.tname, \
                                      alignment.tseqlength, sys.maxint, maxFlank, minCovers)
        
        #Orientation correct - how we're extending target
        if ret == SUPPORTFLAGS.right:
            ret = SUPPORTFLAGS.left
        elif ret == SUPPORTFLAGS.left:
            ret = SUPPORTFLAGS.right
        
        return ret

    def supportsRegion(self, alignment, rName, rStart, rEnd, maxFlank=50, minCovers=25):
        """
        Checks if read's alignment supports a region. 
        if r(egion)Name/Start/End is not specified the target sequence's boundaries are used
        
        The region of support can be extended using:
            maxFlank  = Max distance from end query alignment can stop to still be a candiate
            minCovers = Min amount of sequence that extends the target
        
        Uses the SUPPORTFLAGS eunum for return values
        Returns "left" for covering the region's left most point
        Returns "right" for covering the region's right most point
        Returns "span" for covering left and right
        Returns "contain" for being within left and right point
        Returns "none" if no relation (different targets - contained - otherwise)
        """

        ret = SUPPORTFLAGS.none
        
        if rName != alignment.tname:
            return ret
        
        if alignment.tstrand == "0":
            #Moving into Region from left
            #Meaning we extend the region into the left (to the right)
            distanceFromEnd = rStart - alignment.tend
            remainingReadSeq3 = (alignment.qseqlength - alignment.qend) - minCovers
            logging.debug("+ Strand on " + alignment.qname)
            logging.debug("LeftDist %d remainSeq %d" % (distanceFromEnd, remainingReadSeq3))
            if distanceFromEnd >= 0 and \
               distanceFromEnd < remainingReadSeq3 and \
               distanceFromEnd <= maxFlank :
                #Positive Strand Maps on Left Contig and enters gap     
                ret += SUPPORTFLAGS.left
                logging.debug("left")
            
            #moving out of Region to right
            #meaning we extend into the right (to the left)
            distanceFromBeginning = alignment.tstart - rEnd
            remainingReadSeq5 = alignment.qstart - minCovers
            logging.debug("RightDist %d remainSeq %d" % (distanceFromBeginning,remainingReadSeq5))
            if distanceFromBeginning >= 0 and \
               distanceFromBeginning < remainingReadSeq5 and \
               distanceFromBeginning <= maxFlank :
                #Positive Strand Maps on Right Contig and Exits Gap
                ret += SUPPORTFLAGS.right
                logging.debug("right support")
                 
            if alignment.tstart <= (rStart - maxFlank) and alignment.tend >= (rEnd + maxFlank):
                ret = SUPPORTFLAGS.span
                logging.debug("span support")
            
            elif alignment.tstart >= rStart and alignment.tend <= rEnd:
                ret = SUPPORTFLAGS.contain
                logging.debug("contain support")

        elif alignment.tstrand == "1":
            #Moving into region from left  on - strand
            #Meaning we extend to the right on + strand 
            distanceFromBeginning = alignment.tstart - rEnd
            remainingReadSeq3 = (alignment.qseqlength - alignment.qend) - minCovers
            
            logging.debug("- Strand on "+alignment.qname)
            logging.debug("RightDist %d remainSeq %d" % (distanceFromBeginning,remainingReadSeq3))
            if distanceFromBeginning >= 0 and \
               distanceFromBeginning < remainingReadSeq3 and \
               distanceFromBeginning <= maxFlank :
                ret += SUPPORTFLAGS.right
                logging.debug("right support")
            
            #Moving out of region to the right on - strand
            #Meaning we extend to the left on + strand
            distanceFromEnd = rStart - alignment.tend
            remainingReadSeq5 = alignment.qstart - minCovers
            logging.debug("LeftDist %d remainSeq %d" % (distanceFromEnd,remainingReadSeq5))
            if distanceFromEnd >= 0 and \
               distanceFromEnd < remainingReadSeq5 and \
               distanceFromEnd <= maxFlank :
                ret += SUPPORTFLAGS.left
                logging.debug("left support")
                
            if alignment.tstart <= rStart - minCovers and alignment.tend >= rEnd + minCovers:
                ret = SUPPORTFLAGS.span
                logging.debug("span support")
                
            elif alignment.tstart >= rStart and alignment.tend <= rEnd:
                ret = SUPPORTFLAGS.contain
                logging.debug("contain support")
        logging.debug("")
        return ret
        
    def groupReadHits(self, alignments, minMapq):
        logging.debug("Grouping Read Hits")
        reads = defaultdict(list)#readname: [hit hit hit]
        
        for line in alignments:
            if line.mapqv >= minMapq:
                reads[line.qname].append(line)
            else:
                logging.debug("Hit for %s has mapq %d - below threshold %d" % (line.qname, line.mapqv, minMapq))
        
        return reads    
    
    def getBestScore(self, reads):
        """
        Gets the read with the best score.
        parses all of the reads and returns the best hit of the group
        
        best hit is the alignment with the best score
        and if there is a tie, the tie break is based
        on alignment score
        
        and the to just take the first of the remaining alignments
        """
        if len(reads) == 1:
            return reads[0]
        #Get who we're using
        bestScore = []
        mostAccurate = []
        for read in reads:
            if read.flag & ALIGNFLAGS.bestScore:
                bestScore.append(read)
            if read.flag & ALIGNFLAGS.mostAccurate:
                mostAccurate.append(read)
        
        if len(bestScore) == 0:
            #Actually have to hunt it down, we're looking at some weiners
            bestScore, mostAccurate, lqpa = self.groupComparison(reads, False)
            #return None
        
        #Tie breaking
        anchor = None
        if len(bestScore) != 1:
            if len(mostAccurate) == 1:
                for i in bestScore:
                    if i == mostAccurate[0]:
                        anchor = mostAccurate[0]
        
        #Just arbiturarily take one
        if anchor is None:
            anchor = bestScore[0]
        
        return anchor

    def untangle(self, reads):
        """
        Given a group of subread's hits, see if we can eliminate the 
        spurious repeat matches
        
        Find LQPA 
        recursively call(give me all the concordant neighbors to the left and to the right)
        continue until there isn't exactly one neighbor
        """
        if len(reads) < 2:
            return reads
        
        anchor = self.getBestScore(reads)
        #I don't like this happening.. something is strange
        if anchor == None:
            logging.warning("Read %s doesn't have a best hit" % (reads[0].qname))
            return []
        
        newReads = self.layout(anchor, list(reads))
        
        newReads.sort(cmp=lambda x,y: x.qstart < y.qstart)
        return newReads
    
    """
    after I have my anchor, I want to find queryConcordant reads.
    I'm going to find all the reads that are queryConcordant outside of the anchor.
    If there is more than one read per side, I'll tie break those by 
     getting the one that is considered the best hit
        
    1 - Anchor solution is above.
    2 - layout 1
    3 - layout -1
    """
    def layout(self, anchor, allReads):
        """
        uses the anchor to find all unique concordant hits from the anchor 
        side = -1, check UpStream(5') of anchor
        side = 1, check DownStream (3') of anchor
        """
        foundNew = True
        #holds the min/max used qstart qend
        regionHolder = copy.deepcopy(anchor)
        ret = [anchor]
        while foundNew:
            foundNew = False
            reads = filter(lambda x: x not in ret, allReads)
            if len(reads) == 0:
                continue
            sides = { 1: [],
                     -1: []}
            
            for read in reads:
                side = self.isQueryConcordant(regionHolder, read)
                if side != 0:
                    foundNew = True
                    sides[side].append(read)
            
            if len(sides[1]) == 1:
                regionHolder.qstart = sides[1][0].qstart
                #region update (upstream)
                ret.append(sides[1][0])
            elif len(sides[1]) > 1:#Let all these guys duke it out
                bestHit = self.getBestScore(sides[1])
                if bestHit is None:
                    logging.warning("Here")
                    logging.warning("\n"+"\n".join([str(x) for x in sides[1]]))
                newGuys = self.layout(bestHit, sides[1])
                regionHolder.qstart = newGuys[0].qstart
                ret.extend(newGuys)
                #region update (upstream)
                
            if len(sides[-1]) == 1:
                bestHit = sides[-1]
                #region update (downstream)
                regionHolder.qend = sides[-1][0].qend
                ret.append(sides[-1][0])
            elif len(sides[-1]) > 1:
                bestHit = self.getBestScore(sides[-1])
                newGuys = self.layout(bestHit, sides[-1])
                regionHolder.qend = newGuys[-1].qend
                ret.extend(newGuys)
        
        ret.sort(cmp=lambda x,y: x.qstart < y.qstart)
        return ret

    def isDiscordant(self, alignment, tailAllowed=150):
        """
        If a read maps with long tails that don't map off the target's ends, call it discordant
        
        tailAllowed = max tail length allowed to not map to the reference
        
        Note - This doesn't check a reaason for the discordantcy (sp) -
             - Only checks for long tails.
             
        Deprecated again
        """
        orientation = self.extendsTarget(alignment)
        if orientation == SUPPORTFLAGS.span:
            #Not much to prove here.
            return False
            
        isDiscord = True
        if alignment.tstrand == "0":
            threeLen = alignment.qseqlength - alignment.qend
            fiveLen = alignment.qstart
            
            if orientation == SUPPORTFLAGS.left and fiveLen <= tailAllowed:
                isDiscord = False
            elif orientation == SUPPORTFLAGS.right and threeLen <= tailAllowed:
                isDiscord = False
            
        elif alignment.tstrand == "1":
            
            threeLen = alignment.qstart
            fiveLen = alignment.qseqlength - alignment.qend
            
            if orientation == SUPPORTFLAGS.left and fiveLen <= tailAllowed:
                isDiscord = False
            elif orientation == SUPPORTFLAGS.right and threeLen <= tailAllowed:
                isDiscord = False
            
        return isDiscord
    
    def idAdapters(self, reads):
        """
        Using the alignments for a particular read,
        Identify adapters and return a new list of reads that have been split up
        I'll rename the reads that have adapters
        
        ##split those subreads into two alignments
        Consider a subread that overlaps with itself as a missed adapter, split it.
            example - for a single read
            hit1:
                qstart 0: qend 100: tstart = 400: tend=500: strand = 0
            hit2:
                qstart = 150: qend = 250: tstart = 400: tend = 500: strand = 1
        this is highly indicitive of a missed adapter
        
        reads is a list of alignments that must be sorted in query concordant order 
            (meaning the hits are in order of how they should have been sequenced...
             return of AlignmentConnector.untangle makes this sort)
        
        """
        split = False
        for i in range(1, len(reads)):
            a = reads[i-1]
            b = reads[i]
            
            #Major chacteristics of missed adapter
            #Same target - different strand
            #Aligns in the same area
            if a.tname == b.tname and a.tstrand != b.tstrand \
               and (abs(a.tstart - b.tstart) <= 75 or abs(a.tend - b.tend) <= 75)\
               and (b.qstart - a.qend) <= 50:
                
                #Do some more tests to make sure that they overlap in the
                #Way we expect missed addapters to overlap
                ovl = self.supportsRegion(a, b.tname, b.tstart, b.tend)
                if a.tstrand == '0' and (ovl & SUPPORTFLAGS.right or \
                                         ovl & SUPPORTFLAGS.contain or\
                                         ovl & SUPPORTFLAGS.span):
                    split = True
                    logging.debug("Missed adapter in read! %s" % a.qname)
                    #We have a missed adapter
                    a.trim = (0, a.qend)
                    a.qseqlength = a.qend
                    #a.qname += "##%d#%d##" % a.trim

                    b.trim = (b.qstart, b.qseqlength)
                    shift = b.qend - b.qstart
                    b.qstart = 0
                    b.qend -= shift
                    b.qseqlength -= shift
                    #b.qname += "##%d#%d##" % b.trim

                elif a.tstrand == '1' and (ovl & SUPPORTFLAGS.left or \
                                           ovl & SUPPORTFLAGS.contain or\
                                           ovl & SUPPORTFLAGS.span):
                    split = True
                    logging.debug("Missed adapter in read! %s" % a.qname)
                    b.trim = (0, b.qend)
                    b.qseqlength = b.qend
                            
                    a.trim = (a.qstart, a.qseqlength)
                    shift = a.qend - a.qstart
                    a.qstart = 0
                    a.qend -= shift
                    a.qseqlength -= shift
            
        return split
        
class GapSupporter():
    """
    Holds a gapInfo file, as you feed self.support reads,
    it'll figure out the support and then keep track of it.
    
    you can add an existing alignCon to the GapSupporter if you've
    made one elsewhere and want to save space. But another will
    be automatically made if you don't

    Also, this does the bookkeeping to track what gaps are supported
    by what reads and how.
    
    Can run summary statistics on how many gaps are supported and such
    """
    def __init__(self,  gapInfo, alignCon=None):
        self.gapInfo = gapInfo
        
        try:
            self.gapIndex = self.gapInfo.getSortedGaps()
        except Exception:#For any reason
            self.gapIndex = None
        
        self.alignCon = AlignmentConnector() if alignCon is None else alignCon
        self.gapGraph= GapGraph()
        
        #{ readName: [(alignment, [nodeName, nodeName]), ] }
        #Same as that weird name manipulation as before.
        #see if it is a span
        #self.readSupport = {}
        
    def classifyRead(self, alignmentGroup, capturedOnly=False):
        """
        For each alignment in a read, add information about read supporting
        gaps
        alignmentGroup must be in order

        spanOnly == True:
            only add span support
        capturedOnly == False
            ignore scaffold gaps
        """
        self.capturedGapSupport(alignmentGroup)
        if not capturedOnly:
            self.scaffoldGapSupport(alignmentGroup)
            #self.consolidate spport
     
    def capturedGapSupport(self, alignmentGroup):
        """
        returns a list of nodes in the graph the read supports
        """
        # gapName : SUPPORTFLAG
        ret = defaultdict(int)
        #Finding gaps to support and classifying
        for alignment in alignmentGroup:
            candidates = []
            scaffold = alignment.tname.split('|')[-1]
            if self.gapIndex is not None:
                try:
                    #Find range of gaps we can potentially support 
                    #I play it safe and get up to 2 extra gaps we could support
                    startIndex = max(0, bisect_left(self.gapIndex[scaffold], alignment.tstart) - 1)
                    endIndex = bisect_right(self.gapIndex[scaffold], alignment.tend) + 1
                    candidates = self.gapIndex[scaffold][startIndex:endIndex]
                except KeyError:
                    pass#No gaps
            else:#never happens?
                gaps = filter(lambda x: x.startswith(scaffold), self.gapInfo.keys())
                for key in gaps:
                    candidates.append(self.gapInfo[key])
            
            for gap in candidates:
                logging.debug("gapSup")
                supType = self.alignCon.supportsRegion(alignment,  \
                                            rName=gap.scaffold, \
                                            rStart=gap.start,\
                                            rEnd = gap.end)
                if supType == SUPPORTFLAGS.none:
                    continue
                ret[gap.name] += supType
        #Adding support to graph
        readName = alignmentGroup[0].qname
        for gapName in ret:
            #New way
            gap = self.gapInfo[gapName]
            supType = ret[gapName]
            lftNode = gap.leftContig + "e3"
            rhtNode = gap.rightContig + "e5"
            
            #Extends gap to left or rightContig to right
            if supType == SUPPORTFLAGS.left:
                self.gapGraph.add_extend(lftNode, readName)
            elif supType == SUPPORTFLAGS.right:
                self.gapGraph.add_extend(rhtNode, readName)
            elif supType == SUPPORTFLAGS.span:
                self.gapGraph.add_evidence(lftNode, rhtNode, readName)
        
        return ret

    def scaffoldGapSupport(self, alignmentGroup):
        """ 
        Checks to see if and how an alignment supports between scaffolding gaps
        
        alignmentGroup must be sorted by query portion used
        sourceA is alignmentGroup[0] support
        sourceB is alignmentGroup[1] support
        add a link between A and B
        sourceC is alignmentGroup[2] support
        """
        readName = alignmentGroup[0].qname
        logging.debug("looking at: " + readName + " for scaffold extension/unification")
        
        alignmentGroup.sort(cmp=lambda x,y: x.qstart - y.qstart)
        anchor = self.alignCon.getBestScore(alignmentGroup)
        
        flags = []
        
        logging.debug("Building flags table")
        logging.debug("%d - %s" % (len(alignmentGroup), " ".join(map(str, alignmentGroup))))
        for alignment in alignmentGroup:
            #[(Flag, lftNode, rhtNode),..]
            base = alignment.tname.split('|')[-1]
            flag = self.alignCon.extendsTarget(alignment)
            flags.append((flag,                 #SupFlag
                          base+"e5",            #LftNode
                          base+"e3",            #RhtNode
                          alignment.tstrand))   #Strand   
            logging.debug(str(base) + " " + str(flag) + " " + str(alignment))
        #Solo..
        if len(alignmentGroup) == 1:
            logging.debug("Solo alignment comparison")
            flag1, lftNode1, rhtNode1, strand1 = flags[0]
            if flag1 == SUPPORTFLAGS.span:
                #This can't happen...
                self.gapGraph.add_extend(lftNode1, readName)
                self.gapGraph.add_extend(rhtNode1, readName)
            elif flag1 == SUPPORTFLAGS.right:
                self.gapGraph.add_extend(rhtNode1, readName)
            elif flag1 == SUPPORTFLAGS.left:
                self.gapGraph.add_extend(lftNode1, readName)
            return
        
        index = alignmentGroup.index(anchor)
        
        #go upstream of anchor
        pairs = []
        for i in range(index, 0, -1):
            #pairs.append((alignmentGroup[i], flags[i], alignmentGroup[i-1], flags[i-1]))
            pairs.append((alignmentGroup[i-1], flags[i-1], alignmentGroup[i], flags[i]))
            logging.debug("made pair upstream")
            logging.debug(" ".join(map(str,[alignmentGroup[i], flags[i], alignmentGroup[i-1], flags[i-1]])))
        self.__scaffRangeDo__(pairs)
        #Go downstream of anchor
        pairs = []
        for i in range(index, len(alignmentGroup) - 1, 1):
            pairs.append((alignmentGroup[i], flags[i], alignmentGroup[i+1], flags[i+1]))
            logging.debug("made pair downstream")
            logging.debug(" ".join(map(str,[alignmentGroup[i], flags[i], alignmentGroup[i+1], flags[i+1]])))
        self.__scaffRangeDo__(pairs)
        
    
    def __scaffRangeDo__(self, pairs):
        """
        Go through the pairs and see if there is a link between the them
        """
        if len(pairs) == 0:
            return
        readName = pairs[0][0].qname
        i = 1
        for align1, flags1, align2, flags2 in pairs:
            logging.debug(flags1)
            logging.debug(flags2)
            flag1, lftNode1, rhtNode1, strand1 = flags1
            flag2, lftNode2, rhtNode2, strand2 = flags2
            #Super Conservative, I'm not going to let
            #ends of multi-mappers extend scaffolds
            if strand1 == '0':
                if flag1 == SUPPORTFLAGS.right or flag1 == SUPPORTFLAGS.span:#extend correctly
                    if strand2 == '0':
                        if flag2 == SUPPORTFLAGS.left or flag2 == SUPPORTFLAGS.span:
                            self.gapGraph.add_evidence(rhtNode1, lftNode2, readName)
                    elif strand2 == '1':
                        if flag2 == SUPPORTFLAGS.right or flag2 == SUPPORTFLAGS.span:
                            self.gapGraph.add_evidence(rhtNode1, rhtNode2, readName)
                    else:
                        return; #We've broken from the anchor
                else:
                    return; #we've broken the anchor chain
            
            elif strand1 == '1':
                #I never strand corrected? left means right, right means left
                if flag1 == SUPPORTFLAGS.left or flag1 == SUPPORTFLAGS.span:#extend correctly
                    if strand2 == '0':
                        if flag2 == SUPPORTFLAGS.left or flag2 == SUPPORTFLAGS.span:
                            self.gapGraph.add_evidence(lftNode1, lftNode2, readName)
                    elif strand2 == '1':
                        if flag2 == SUPPORTFLAGS.right or flag2 == SUPPORTFLAGS.span:
                            self.gapGraph.add_evidence(lftNode1, rhtNode2, readName)
                    else:
                        return; #We've broken from the anchor           
                else:
                    return; #we've broken the anchor chain

class Support():
    """
    Worker for this script
    Takes Reads, Connects the Alignments, Classifies the Gap Support
    """
    
    def __init__(self):
        self.parseArgs()
        setupLogging(self.options.debug)
    
    def parseArgs(self):
        parser = OptionParser(USAGE)
        
        parser.add_option("-m", "--minMapq", default=200, type=int, \
                          help=("Minimum MapQ of a read to be considered "
                                "for support (200)"))
        parser.add_option("--spanOnly", action="store_true", \
                          help=("Only allow support by reads that span an"
                                " entire gap. i.e. no contig extension."))
        parser.add_option("--capturedOnly", action="store_true", \
                  help=("Only find support for captured gaps. "\
                        " i.e. no between-scaffold gap-filling"))
        parser.add_option("--debug", action="store_true", \
                          help="Increases verbosity of logging" )

        self.options, args = parser.parse_args()
        
        if len(args) != 3:
            parser.error("Error! Incorrect number of arguments")
        
        if not os.path.isfile(args[0]):
            parser.error("Error! Alignment File Does Not Exist")
        self.alignmentFileName = args[0]
        
        if not os.path.isfile(args[1]):
            parser.error("Error! Gap Info File Does Not Exist") 
        self.gapFileName = args[1]
        
        if os.path.isfile(args[2]):
            sys.stderr.write("[WARNING] Output File Being Overwritten!")
        self.outputFileName = args[2]
        
        self.gapInfo = GapInfoFile(self.gapFileName)
        if os.path.splitext(self.alignmentFileName)[1] == '.m4':
            self.alignments = M4File(self.alignmentFileName)
        elif os.path.splitext(self.alignmentFileName)[1] == '.m5':
            self.alignments = M5File(self.alignmentFileName)
        else:
            parser.error("Error! Alignment File Extension (%s) not recognized." \
                         % os.path.splitext(self.alignmentFileName)[1])
    
    def run(self):
        """
        Given an alignment file, put it through the paces that will figure out what
        gaps each read supports
        """
        logging.info("Building Helper Objects")
        connector = AlignmentConnector()
        supporter = GapSupporter(self.gapInfo, alignCon = connector)
        logging.info("Connecting Alignments")
        alignments = connector.parseAlignments(self.alignments, self.options.minMapq)
        
        logging.info("Classifying Alignments' Support")
        for readGroup in alignments:
            supporter.classifyRead(readGroup, self.options.capturedOnly)
        
        logging.info("Saving Support Graph")
        supporter.gapGraph.saveGraph(self.outputFileName, self.options.spanOnly)
        logging.info("Finished")

if __name__ == '__main__':
    main = Support()
    main.run()
    pass