File: BinTree.cpp

package info (click to toggle)
bedtools 2.27.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 54,804 kB
  • sloc: cpp: 38,072; sh: 7,307; makefile: 2,241; python: 163
file content (144 lines) | stat: -rw-r--r-- 4,348 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#include "BinTree.h"
#include "FileRecordMgr.h"


BinTree::BinTree(ContextIntersect *context)
:  _context(context),
  _binOffsetsExtended(NULL)
 {
	_binOffsetsExtended = new binNumType[NUM_BIN_LEVELS];
	memset(_binOffsetsExtended, 0, NUM_BIN_LEVELS * sizeof(binNumType));

	//start at idx 1, because the memset above already initialized
	//the first idx to zero, which is what we want.
	for (binNumType i= 1; i < NUM_BIN_LEVELS; i++) {
		_binOffsetsExtended[i] = _binOffsetsExtended[i-1] + (1 << ((NUM_BIN_LEVELS - i -1) * 3));
	}
}

BinTree::~BinTree() {
	delete [] _binOffsetsExtended;
}

void BinTree::loadDB()
{
	for (int i=0; i < _context->getNumDatabaseFiles(); i++) {
		FileRecordMgr *databaseFile = _context->getDatabaseFile(i);

		Record *record = NULL;
		while (!databaseFile->eof()) {
			record = databaseFile->getNextRecord();
			//In addition to NULL records, we also don't want to add unmapped reads.
			if (record == NULL || record->isUnmapped()) {
				continue;
			}

			_context->testNameConventions(record);

			if (!addRecordToTree(record)) {
				fprintf(stderr, "ERROR: Unable to add record to tree.\n");
				databaseFile->close();
				exit(1);
			}
		}
	}
}

void BinTree::getHits(Record *record, RecordKeyVector &hitSet)
{
	if (record->isUnmapped()) {
		return;
	}
    const string &chr = record->getChrName();
	mainMapType::iterator mainIter = _mainMap.find(chr);
	if (mainIter == _mainMap.end()) {
		//given chrom not even in map.
		return;
	}

    binNumType startPos = record->getStartPos();
    binNumType endPos = record->getEndPos();

    binNumType startBin = (startPos >> _binFirstShift);
    binNumType endBin = ((endPos-1) >> _binFirstShift);


	allBinsType &bins = mainIter->second;

    /* SYNOPSIS:
         1. We loop through each UCSC BIN level for feature A's chrom.
         2. For each BIN, we loop through each B feature and add it to
            hits if it meets all of the user's requests, which include:
               (a) overlap fraction, (b) strandedness, (c) reciprocal overlap
    */
    for (binNumType i = 0; i < NUM_BIN_LEVELS; i++) {
        binNumType offset = _binOffsetsExtended[i];
        for (binNumType j = (startBin+offset); j <= (endBin+offset); j++)  {

        	// move to the next bin if this one is empty
        	allBinsType::iterator allBinsIter = bins.find(j);
        	if (allBinsIter == bins.end()) {
        		continue;
        	}
        	binType &bin = allBinsIter->second;

        	for (binType::iterator iter = bin.begin(); iter != bin.end(); iter++) {
            	Record *dbRec = *iter;
            	if (record->intersects(dbRec,
                                       _context->getSameStrand(),
                                       _context->getDiffStrand(),
            			               _context->getOverlapFractionA(),
                                       _context->getOverlapFractionB(),
                                       _context->getReciprocalFraction(),
                                       _context->getEitherFraction()
                                      )
                    )
                {
            		hitSet.push_back(dbRec);
            	}
            }
        }
        startBin >>= _binNextShift;
        endBin >>= _binNextShift;
    }
	if (_context->getSortOutput()) {
		hitSet.sortVector();
	}
}

bool BinTree::addRecordToTree(Record *record)
{
	// Get chr, bin.
	const string &chr = record->getChrName();
	binNumType startPos = (binNumType)(record->getStartPos());
	binNumType endPos = (binNumType)(record->getEndPos());
	binNumType binNum = getBin(startPos, endPos);

	if (binNum < 0 || binNum >= NUM_BINS) {
		fprintf(stderr, "ERROR: Received illegal bin number %u from getBin call.\n", binNum);
		return false;
	}
	_mainMap[chr][binNum].push_back(record);
	return true;
}


BinTree::binNumType BinTree::getBin(const Record *record) const {
	return getBin((binNumType)(record->getStartPos()), (binNumType)(record->getEndPos()));
}

BinTree::binNumType BinTree::getBin(binNumType start, binNumType end) const {
    --end;
    start >>= _binFirstShift;
    end   >>= _binFirstShift;

    for (binNumType i = 0; i < NUM_BIN_LEVELS; ++i) {
        if (start == end) {
        	return _binOffsetsExtended[i] + start;
        }
        start >>= _binNextShift;
        end   >>= _binNextShift;
    }
    //failure
    return -1;
}