File: QualityCaller.cpp

package info (click to toggle)
ray 2.3.1-9
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 6,008 kB
  • sloc: cpp: 49,973; sh: 339; makefile: 281; python: 168
file content (109 lines) | stat: -rw-r--r-- 2,586 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
/*
 	Ray
    Copyright (C) 2010, 2011, 2012 Sébastien Boisvert

	http://DeNovoAssembler.SourceForge.Net/

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, version 3 of the License.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You have received a copy of the GNU General Public License
    along with this program (gpl-3.0.txt).  
	see <http://www.gnu.org/licenses/>
*/

#include "QualityCaller.h"

#include <RayPlatform/core/statistics.h>

#include <iostream>
#include <assert.h>
#include <math.h>
using namespace std;

/*
 * \see http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient
 */
double QualityCaller::computeCorrelation(vector<int>*x,vector<int>*y){
	if(x->size()==0)
		return 0;

	#ifdef CONFIG_ASSERT
	assert(x->size()==y->size());
	#endif

	double averageX=getAverage(x);
	double averageY=getAverage(y);

	double firstSum=0;
	double deviationX=0;
	double deviationY=0;

	for(int i=0;i<(int)x->size();i++){
		int xValue=x->at(i);
		int yValue=y->at(i);

		double diffX= xValue - averageX;
		double diffY= yValue - averageY;

		firstSum+= diffX * diffY;

		deviationX+= diffX*diffX;
		deviationY+= diffY*diffY;
	}

	if(deviationX==0 || deviationY==0)
		return 0;

	double correlation=(0.0+firstSum)/(sqrt(0.0+deviationX) * sqrt(0.0+deviationY));

	return correlation;
}

double QualityCaller::computeQuality(map<CoverageDepth,LargeCount>*array1,map<CoverageDepth,LargeCount>*array2){
	vector<int> y1Values;
	vector<int> y2Values;

	#ifdef CONFIG_CALLER_VERBOSE
	cout<<"peakY for array1 "<<bestX<<endl;
	#endif

	for(map<CoverageDepth,LargeCount>::iterator i=array1->begin();i!=array1->end();i++){
		int x1=i->first;
		int y1=i->second;
		

		/* array2 should alwayas have this point */
		if(array2->count(x1) > 0){

			int y2=(*array2)[x1];

			#ifdef CONFIG_CALLER_VERBOSE_POINTS
			cout<<"POINT	"<<x1<<"	"<<y1<<"	"<<y2<<endl;
			#endif

			y1Values.push_back(y1);
			y2Values.push_back(y2);
		}
	}

	double correlation=computeCorrelation(&y1Values,&y2Values);

	#ifdef CONFIG_CALLER_VERBOSE
	cout<<"Correlation computed on "<<y1Values.size()<<" points."<<endl;
	#endif

	if(correlation<0)
		correlation=-correlation;

	/* the quality score is the absolute correlation coefficient */
	return correlation;
}