File: CoverageDistribution.cpp

package info (click to toggle)
ray 2.3.1-9
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 6,008 kB
  • sloc: cpp: 49,973; sh: 339; makefile: 281; python: 168
file content (124 lines) | stat: -rw-r--r-- 3,243 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
/*
 	Ray
    Copyright (C)  2010, 2011, 2012 Sébastien Boisvert

	http://DeNovoAssembler.SourceForge.Net/

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, version 3 of the License.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You have received a copy of the GNU General Public License
    along with this program (gpl-3.0.txt).  
	see <http://www.gnu.org/licenses/>

*/

#include "CoverageDistribution.h"

#include <iostream>
#include <fstream>
#include <map>
#ifdef CONFIG_ASSERT
#include <assert.h>
#endif
using namespace std;


CoverageDistribution::CoverageDistribution(map<CoverageDepth,LargeCount>*distributionOfCoverage,string*file){

	if(file!=NULL){
		ofstream f;
		f.open(file->c_str());

		f<<"# KmerCoverage	Frequency"<<endl;
		f<<"# Any frequency is a even number because of odd k-mer length"<<endl;

		for(map<CoverageDepth,LargeCount>::iterator i=distributionOfCoverage->begin();i!=distributionOfCoverage->end();i++){
			f<<""<<i->first<<" "<<i->second<<endl;
		}
		f.close();
	}
	
	vector<CoverageDepth> x;
	vector<LargeCount> y;
	for(map<CoverageDepth,LargeCount>::iterator i=distributionOfCoverage->begin();i!=distributionOfCoverage->end();i++){
		x.push_back(i->first);
		y.push_back(i->second);
	}

	int windowSize=10;
	CoverageDepth minimumX=1;
	LargeCount minimumY=2*4096;
	LargeCount minimumY2=55000;
	CoverageDepth maximumX=65535-1;
	CoverageDepth safeThreshold=256;

	/** get the votes to find the peak */
	map<int,int> votes;
	for(int i=0;i<(int)x.size();i++){
		int largestPosition=i;
		for(int j=0;j<windowSize;j++){
			int position=i+j;
			if(position >= (int)x.size())
				break;
			if(y.at(position) > y.at(largestPosition))
				largestPosition=position;
		}
	
		if(x[largestPosition]>maximumX)
			continue;

		if(x[largestPosition]<minimumX)
			continue;
	
		if(x[largestPosition] >= safeThreshold && y[largestPosition] < minimumY2)
			continue;
		
		if(y.at(largestPosition)>minimumY)
			votes[largestPosition]++;
	}

	/** check votes */
	int largestPosition=votes.begin()->first;
	for(map<int,int>::iterator i=votes.begin();i!=votes.end();i++){
		if((i->second > votes[largestPosition] || y[i->first] > y[largestPosition]))
			largestPosition=i->first;
		//cout<<"x: "<<x[i->first]<<" votes: "<<i->second<<" y: "<<y[i->first]<<endl;
	}
	
	int minimumPosition=largestPosition;
	int i=largestPosition;
	while(i >= 0){
		if(y[i] <= y[minimumPosition])
			minimumPosition=i;
		i--;
	}

	m_minimumCoverage=x[minimumPosition];
	m_peakCoverage=x[largestPosition];

	m_repeatCoverage=2*m_peakCoverage;
	int diff=m_peakCoverage-m_minimumCoverage;
	int candidate=m_peakCoverage+diff;

	if(candidate<m_repeatCoverage)
		m_repeatCoverage=candidate;
}

int CoverageDistribution::getPeakCoverage(){
	return m_peakCoverage;
}

int CoverageDistribution::getMinimumCoverage(){
	return m_minimumCoverage;
}

int CoverageDistribution::getRepeatCoverage(){
	return m_repeatCoverage;
}