File: clusterBed.cpp

package info (click to toggle)
bedtools 2.27.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 54,804 kB
  • sloc: cpp: 38,072; sh: 7,307; makefile: 2,241; python: 163
file content (120 lines) | stat: -rw-r--r-- 3,592 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/*****************************************************************************
  clusterBed.cpp

  (c) 2009 - Aaron Quinlan
  Hall Laboratory
  Department of Biochemistry and Molecular Genetics
  University of Virginia
  aaronquinlan@gmail.com

  Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "lineFileUtilities.h"
#include "clusterBed.h"

// = Constructor =
BedCluster::BedCluster(string &bedFile, 
                   int  maxDistance, 
                   bool forceStrand) 
    :
    _bedFile(bedFile),
    _forceStrand(forceStrand),
    _maxDistance(maxDistance)
{
    _bed = new BedFile(bedFile);
    if (_forceStrand == false)
        ClusterBed();
    else
        ClusterBedStranded();
}


// = Destructor =
BedCluster::~BedCluster(void) 
{}


// = Cluster overlapping or nearby BED entries =
void BedCluster::ClusterBed() {

    uint32_t cluster_id = 0;
    BED prev, curr;
    int end   = -1;
    
    _bed->Open();
    while (_bed->GetNextBed(curr, true)) { // true = force sorted intervals
        if (_bed->_status != BED_VALID)
            continue;            

        // new cluster, no overlap
        if ( (((int) curr.start - end) > _maxDistance) || 
             (curr.chrom != prev.chrom)
           ) 
        {
            cluster_id++;
            end   = curr.end;
        }
        else {
            if ((int) curr.end > end)
                end   = curr.end;
        }
        prev = curr;
        _bed->reportBedTab(curr);
        printf("%d\n", cluster_id);
    }
}


// = Cluster overlapping BED entries, accounting for strandedness =
void BedCluster::ClusterBedStranded() {

    // load the "B" bed file into a map so
    // that we can easily compare "A" to it for overlaps
    _bed->loadBedFileIntoMapNoBin();
    
    uint32_t cluster_id = 0;

    // loop through each chromosome and merge their BED entries
    masterBedMapNoBin::const_iterator m    = _bed->bedMapNoBin.begin();
    masterBedMapNoBin::const_iterator mEnd = _bed->bedMapNoBin.end();
    for (; m != mEnd; ++m) {
        
        // bedList is already sorted by start position.
        string chrom        = m->first;
        vector<BED> bedList = m->second;
    
        // make a list of the two strands to merge separately.
        vector<string> strands(2);
        strands[0] = "+";
        strands[1] = "-";
        // do two passes, one for each strand.
        for (unsigned int s = 0; s < strands.size(); s++) {
            // cluster overlapping features for this chromosome.
            int end   = -1;
            BED prev;    
            vector<BED>::const_iterator bedItr = bedList.begin();
            vector<BED>::const_iterator bedEnd = bedList.end();
            for (; bedItr != bedEnd; ++bedItr) {
                // if forcing strandedness, move on if the hit
                // is not on the current strand.
                if (bedItr->strand != strands[s])
                    continue;
                
                // new cluster, no overlap
                if ( (((int) bedItr->start - end) > _maxDistance) || (end < 0)) 
                {
                    cluster_id++;
                    end   = bedItr->end;
                }
                // same cluster, overlaps
                else {
                    if ((int) bedItr->end > end) 
                        end = bedItr->end;
                }
                prev = *bedItr;
                _bed->reportBedTab(prev);
                printf("%d\n", cluster_id);
            }
        }
    }
}