File: indelCoder.h

package info (click to toggle)
fastml 3.11-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,772 kB
  • sloc: cpp: 48,522; perl: 3,588; ansic: 819; makefile: 386; python: 83; sh: 55
file content (75 lines) | stat: -rw-r--r-- 2,183 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#ifndef ___INDELCODER_
#define ___INDELCODER_

#include "gaps.h"
#include "character.h"
#include "indelCoderOptions.h"

#include "amino.h"
#include "sequenceContainer.h"
#include "recognizeFormat.h"

#include "logFile.h"
#include "talRandom.h"
#include <ctime>
#include <vector>

using namespace std;
// The implementation of IndelCoding scheme MCIC (Muller)
// for "Large-scale parsimony analysis of metazoan indels in protein-coding genes"
// (Parsimony tree reconstruction using indel information - supporting the Ecdysozoa hypothesis - sponges evolutionary close to animals)
// coded as gaps only those gaps in the alignments that were shorter than 50 amino-acids and those which did not start at the N-terminus or end at the C-terminus of the alignment.
//
//
// Simple Indel Coding  SIC (Simmons and Ochoterena 2000) 
// each indel receives a separate 2-state character of presence/absence. 
// Any overlapping indels that exceed the boundaries of this indel are scored as missing data for that indel character.
//
// Modified Complex Indel Coding  MCIC (Muller 2006). 
// MCIC differs from SIC only in the treatment of overlapping indels. 
// uses multistate characters to code overlapping indels and assigns a distinct symmetrical step matrix to those gaps.

// Note:
// (*) implemented for Amino Acids seq. (Later, we can add parameter with Alphabet type)


class indelCoder {

public:
	explicit indelCoder(){};
	virtual ~indelCoder(){};

	void startSequenceContainer();
	void readSequenceIntoGaps();
	//void delimitationOfCharacters();
	void delimitationOfCharacters(indelCoderOptions::codingType type);
	void delimitationOfCharactersSIC();
	//void delimitationOfCharactersMCIC2();

	void determinationCharacterState();
	void determinationCharacterStateSIC();

	void determinationStepsMatrix();
	void printCharacters();
	void printNexus();
	void printFasta();
	void printGapsInfo();
	void printIndelSummary(); 
	void run();

private:
	sequenceContainer _sc;
	vector< vector<int> > _matrix;
	gaps _gaps;
	gaps _unknowns;
	vector<gaps> _gapsVperSc;
	vector<character*> _characters;
};






#endif