1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
|
/*
Ray
Copyright (C) 2010, 2011, 2012 Sébastien Boisvert
http://DeNovoAssembler.SourceForge.Net/
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, version 3 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You have received a copy of the GNU General Public License
along with this program (gpl-3.0.txt).
see <http://www.gnu.org/licenses/>
*/
#include "CoverageDistribution.h"
#include <iostream>
#include <fstream>
#include <map>
#ifdef CONFIG_ASSERT
#include <assert.h>
#endif
using namespace std;
CoverageDistribution::CoverageDistribution(map<CoverageDepth,LargeCount>*distributionOfCoverage,string*file){
if(file!=NULL){
ofstream f;
f.open(file->c_str());
f<<"# KmerCoverage Frequency"<<endl;
f<<"# Any frequency is a even number because of odd k-mer length"<<endl;
for(map<CoverageDepth,LargeCount>::iterator i=distributionOfCoverage->begin();i!=distributionOfCoverage->end();i++){
f<<""<<i->first<<" "<<i->second<<endl;
}
f.close();
}
vector<CoverageDepth> x;
vector<LargeCount> y;
for(map<CoverageDepth,LargeCount>::iterator i=distributionOfCoverage->begin();i!=distributionOfCoverage->end();i++){
x.push_back(i->first);
y.push_back(i->second);
}
int windowSize=10;
CoverageDepth minimumX=1;
LargeCount minimumY=2*4096;
LargeCount minimumY2=55000;
CoverageDepth maximumX=65535-1;
CoverageDepth safeThreshold=256;
/** get the votes to find the peak */
map<int,int> votes;
for(int i=0;i<(int)x.size();i++){
int largestPosition=i;
for(int j=0;j<windowSize;j++){
int position=i+j;
if(position >= (int)x.size())
break;
if(y.at(position) > y.at(largestPosition))
largestPosition=position;
}
if(x[largestPosition]>maximumX)
continue;
if(x[largestPosition]<minimumX)
continue;
if(x[largestPosition] >= safeThreshold && y[largestPosition] < minimumY2)
continue;
if(y.at(largestPosition)>minimumY)
votes[largestPosition]++;
}
/** check votes */
int largestPosition=votes.begin()->first;
for(map<int,int>::iterator i=votes.begin();i!=votes.end();i++){
if((i->second > votes[largestPosition] || y[i->first] > y[largestPosition]))
largestPosition=i->first;
//cout<<"x: "<<x[i->first]<<" votes: "<<i->second<<" y: "<<y[i->first]<<endl;
}
int minimumPosition=largestPosition;
int i=largestPosition;
while(i >= 0){
if(y[i] <= y[minimumPosition])
minimumPosition=i;
i--;
}
m_minimumCoverage=x[minimumPosition];
m_peakCoverage=x[largestPosition];
m_repeatCoverage=2*m_peakCoverage;
int diff=m_peakCoverage-m_minimumCoverage;
int candidate=m_peakCoverage+diff;
if(candidate<m_repeatCoverage)
m_repeatCoverage=candidate;
}
int CoverageDistribution::getPeakCoverage(){
return m_peakCoverage;
}
int CoverageDistribution::getMinimumCoverage(){
return m_minimumCoverage;
}
int CoverageDistribution::getRepeatCoverage(){
return m_repeatCoverage;
}
|