File: seqerrorcommand.h

package info (click to toggle)
mothur 1.24.1-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 7,868 kB
  • sloc: cpp: 110,948; ansic: 2,037; fortran: 665; makefile: 74; sh: 59
file content (110 lines) | stat: -rw-r--r-- 3,017 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#ifndef SEQERRORCOMMAND
#define SEQERRORCOMMAND

/*
 *  seqerrorcommand.h
 *  Mothur
 *
 *  Created by Pat Schloss on 7/15/10.
 *  Copyright 2010 Schloss Lab. All rights reserved.
 *
 */

#include "mothur.h"
#include "command.hpp"
#include "sequence.hpp"
#include "referencedb.h"

struct Compare {
	int AA, AT, AG, AC,	TA, TT, TG, TC,	GA, GT, GG, GC,	CA, CT, CG, CC,	NA, NT, NG, NC, Ai, Ti, Gi, Ci, Ni, dA, dT, dG, dC;
	string refName, queryName, sequence;
	double errorRate;
	int weight, matches, mismatches, total;
	
	Compare(){
		AA=0; AT=0; AG=0; AC=0;
		TA=0; TT=0; TG=0; TC=0;
		GA=0; GT=0; GG=0; GC=0;
		CA=0; CT=0; CG=0; CC=0;
		NA=0; NT=0; NG=0; NC=0;
		Ai=0; Ti=0; Gi=0; Ci=0; Ni=0;
		dA=0; dT=0; dG=0; dC=0;
		refName = "";
		queryName = "";
		weight = 1;
		matches = 0;
		mismatches = 0;
		total = 0;
		errorRate = 1.0000;
		sequence = "";
	}
};

class SeqErrorCommand : public Command {
public:
	SeqErrorCommand(string);
	SeqErrorCommand();
	~SeqErrorCommand() {}
	
	vector<string> setParameters();
	string getCommandName()			{ return "seq.error";				}
	string getCommandCategory()		{ return "Sequence Processing";		}
	string getHelpString();	
	string getCitation() { return "http://www.mothur.org/wiki/Seq.error"; }
	string getDescription()		{ return "seq.error"; }

	
	int execute(); 
	void help() { m->mothurOut(getHelpString()); }	
	
private:
	bool abort;
	ReferenceDB* rdb;
	
	struct linePair {
		unsigned long long start;
		unsigned long long end;
		linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
	};
	
	vector<int> processIDS;   //processid
	vector<linePair> lines;
	vector<linePair> qLines;
	vector<linePair> rLines;

	void getReferences();
	map<string,int> getWeights();
	Compare getErrors(Sequence, Sequence);
	void printErrorHeader(ofstream&);
	void printErrorData(Compare, int, ofstream&, ofstream&);
	void printSubMatrix();
	void printErrorFRFile(map<char, vector<int> >, map<char, vector<int> >);
	void printErrorQuality(map<char, vector<int> >);
	void printQualityFR(vector<vector<int> >, vector<vector<int> >);
	
	int setLines(string, string, string, vector<unsigned long long>&, vector<unsigned long long>&, vector<unsigned long long>&);
	int driver(string, string, string, string, string, string, linePair, linePair, linePair);
	int createProcesses(string, string, string, string, string, string);

	string queryFileName, referenceFileName, qualFileName, reportFileName, namesFileName, outputDir;
	double threshold;
	bool ignoreChimeras, save;
	int numRefs, processors;
	int maxLength, totalBases, totalMatches;
	//ofstream errorSummaryFile, errorSeqFile;
	vector<string> outputNames;
	
	vector<Sequence> referenceSeqs;
	vector<vector<int> > substitutionMatrix;
	vector<vector<int> > qualForwardMap;
	vector<vector<int> > qualReverseMap;
	vector<int> misMatchCounts;
	map<char, vector<int> > qScoreErrorMap;
	map<char, vector<int> > errorForward;
	map<char, vector<int> > errorReverse;
	map<string, int> weights;
	vector<string> megaAlignVector;

};

#endif