File: sampleFile.cpp

package info (click to toggle)
bedtools 2.27.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 54,804 kB
  • sloc: cpp: 38,072; sh: 7,307; makefile: 2,241; python: 163
file content (114 lines) | stat: -rw-r--r-- 2,789 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#include "sampleFile.h"

static bool SampleRecordLtFn(const Record *rec1, const Record *rec2) {
	return (*rec1 < *rec2);
}

SampleFile::SampleFile(ContextSample *context)
: ToolBase(context),
_inputFile(NULL),
_numSamples(0),
_numCurrSamples(0),
_currRecordNum(0)
{
	_numSamples = context->getNumOutputRecords();
	if (_numSamples == 0) {
		_numSamples = DEFAULT_NUM_SAMPLES;
	}
}


SampleFile::~SampleFile()
{

}

 bool SampleFile::init()
 {
	//we're only operating on one file, so the idx is zero.
	_inputFile =  _context->getFile(0);
	_samples.resize(_numSamples, NULL);


	//Context object takes care of the seed, either user given or randomly
	//generated, and seeds the call to srand with it, so we don't have to
	//here.
	if (!_context->hasConstantSeed()) {
		_context->getUnspecifiedSeed();
	}
	return true;
 }


 bool SampleFile::findNext(RecordKeyVector &hits) {
	while (!_inputFile->eof()) {
		Record *record = _inputFile->getNextRecord();
		if (record == NULL) {
			continue;
		} else {
			_currRecordNum++;
			if (!keepRecord(record)) {
				_inputFile->deleteRecord(record);
			}
			return true;
		}
	}
	return false;
}

void  SampleFile::giveFinalReport(RecordOutputMgr *outputMgr) {
	if (_currRecordNum < _numSamples) {
		//die with error;
		cerr << "\n***** ERROR: Input file has fewer records than the requested number of output records. *****" << endl << endl;
		exit(1);
 	}

	//If the output type is BAM, must sort the output records.
	if (_context->getOutputFileType() == FileRecordTypeChecker::BAM_FILE_TYPE) {
		sort(_samples.begin(), _samples.end(), SampleRecordLtFn);
	}
	// Now output all the kept records, then do cleanup.
	for (size_t i=0; i < _numSamples; i++) {
		outputMgr->printRecord(_samples[i]);
	}
}

bool SampleFile::keepRecord(Record *record)
{
	if (!strandComplies(record)) {
		return false;
	}
	if (_numCurrSamples < _numSamples) {
		_samples[_numCurrSamples] = record;
		_numCurrSamples++;
		return true;
	}


	// We need a random number in the range [0, _currRecordNum].
	// Must combine two consective calls to rand()
    // because RAND_MAX is 2^31 (2147483648), whereas
    // the number of input records could be far larger.
    size_t idx = ((((long) rand()) << 31) | rand()) % _currRecordNum;

    if (idx < _numSamples) {
    	//replace old record at idx with this new one.
    	_inputFile->deleteRecord(_samples[idx]);
    	_samples[idx] = record;
    	return true;
    }
    return false;
}

bool SampleFile::strandComplies(const Record * record) {
	if (!upCast(_context)->getSameStrand()) {
		return true;
	}
	if (upCast(_context)->getForwardOnly() && record->getStrandVal() == Record::FORWARD) {
		return true;
	}
	if (upCast(_context)->getReverseOnly() && record->getStrandVal() == Record::REVERSE) {
		return true;
	}
	return false;
}