File: decalc.h

package info (click to toggle)
mothur 1.24.1-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 7,868 kB
  • sloc: cpp: 110,948; ansic: 2,037; fortran: 665; makefile: 74; sh: 59
file content (80 lines) | stat: -rw-r--r-- 2,878 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#ifndef DECALC_H
#define DECALC_H
/*
 *  decalc.h
 *  Mothur
 *
 *  Created by Sarah Westcott on 7/22/09.
 *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
 *
 */

#include "mothur.h"
#include "sequence.hpp"

/***********************************************************************/

//This class was created using the algorythms described in the 
// "At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies" paper 
//by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.

/***********************************************************************/

//this structure is necessary to determine the sequence that contributed to the outliers when we remove them
//this way we can remove all scores that are contributed by outlier sequences.
struct quanMember {
	float score;
	int member1;
	int member2;
	quanMember (float s, int m, int n) : score(s), member1(m),  member2(n) {}
	quanMember() {}
	
};
		
//********************************************************************************************************************
class DeCalculator {

	public:
		
		DeCalculator() { m = MothurOut::getInstance(); }
		~DeCalculator() {};
		
		vector<Sequence> findClosest(Sequence, vector<Sequence*>&, vector<Sequence*>&, int, int);  //takes querySeq, a reference db, filteredRefDB, numWanted, minSim 
		Sequence* findClosest(Sequence*, vector<Sequence*>);
		set<int> getPos() {  return h;  }
		void setMask(string); 
		void setAlignmentLength(int l) {  alignLength = l;  }
		void runMask(Sequence*);
		void trimSeqs(Sequence*, Sequence*, map<int, int>&);
		map<int, int> trimSeqs(Sequence&, vector<Sequence>&);
		void removeObviousOutliers(vector< vector<float> >&, int);
		vector<float> calcFreq(vector<Sequence*>, string);
		vector<int> findWindows(Sequence*, int, int, int&, int);
		vector<float> calcObserved(Sequence*, Sequence*, vector<int>, int);
		vector<float>  calcExpected(vector<float>, float);
		vector<float>  findQav(vector<int>, int, vector<float>);  
		float calcDE(vector<float>, vector<float>);
		float calcDist(Sequence*, Sequence*, int, int);
		float getCoef(vector<float>, vector<float>);
		vector< vector<float> > getQuantiles(vector<Sequence*>, vector<int>, int, vector<float>, int, int, int);
		
		vector<int> returnObviousOutliers(vector< vector<quanMember> >, int);
		
		map<int, int> getMaskMap() { return maskMap; }
		
	private:
		//vector<quanMember> sortContrib(map<quanMember*, float>);  //used by mallard
		float findAverage(vector<float>);
		//int findLargestContrib(vector<int>);
		//void removeContrib(int, vector<quanMember>&);
		string seqMask;
		set<int> h;
		int alignLength;
		map<int, int> maskMap;
		MothurOut* m;

};

/***********************************************************************/

#endif