File: molphyFormat.cpp

package info (click to toggle)
fastml 3.11-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,772 kB
  • sloc: cpp: 48,522; perl: 3,588; ansic: 819; makefile: 386; python: 83; sh: 55
file content (85 lines) | stat: -rw-r--r-- 2,586 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
// 	$Id: molphyFormat.cpp 962 2006-11-07 15:13:34Z privmane $	
#include "molphyFormat.h"
#include "someUtil.h"
#include "errorMsg.h"

sequenceContainer molphyFormat::read(istream &infile, const alphabet* alph) {
	sequenceContainer mySeqData = readUnAligned(infile, alph);
	mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
	return mySeqData;
}
sequenceContainer molphyFormat::readUnAligned(istream &infile, const alphabet* alph) {

	vector<string> seqFileData;
	putFileIntoVectorStringArray(infile,seqFileData);
	if (seqFileData.empty()){
		errorMsg::reportError("unable to open file, or file is empty in molphy format");
	}
	vector<string>::iterator currentLinePosition = seqFileData.begin();

	string::const_iterator itStr = seqFileData.begin()->begin();
	string::const_iterator itStrEnd = seqFileData.begin()->end();

	int f_numSeq;
	bool readSeqNum= fromStringIterToInt(itStr,itStrEnd,f_numSeq);
	if (readSeqNum == false) errorMsg::reportError("Error reading number of sequences while reading MOLPHY sequence format");
	int f_seqLength;
	bool readSeqLen= fromStringIterToInt(itStr,itStrEnd,f_seqLength);
	if (readSeqLen == false) errorMsg::reportError("Error reading the sequences length while reading MOLPHY sequence format");
	currentLinePosition++; // we read the first line.

//---------------------------------------------------------------------
	sequenceContainer mySeqData;

//---------------------------------------------------------------------
//	vector<sequenceContainer::sequenceDatum*> vec;
//	seqDataPtr->getSequenceDatumPtrVectorNonConst(vec);

	int localID=-1;

	vector<string>::const_iterator it1 = seqFileData.begin();
	++it1; //skipping the first line that was read already.
	while (it1!= seqFileData.end()) {
		localID++;	  
		if (it1->empty()) {
			it1++;
			continue; // empty line continue
		}
		// read the name.
		string name(*it1);
		it1++;

		string tmpString;
		while (it1 != seqFileData.end()) {
			if (tmpString.size() < f_seqLength) {
				tmpString+=*it1;
				++it1;
			}
			else break;
		}
		
		mySeqData.add(sequence(tmpString,name,"",localID,alph));

	}
	return mySeqData;
}




void molphyFormat::write(ostream &out, const sequenceContainer& sd) {
	out<<sd.numberOfSeqs()<<" "<<sd.seqLen()<<endl;
	for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) {
		out<<it5->name()<<endl;
		string seqString = it5->toString();
		int k=0;
		for (string::const_iterator cPos=seqString.begin() ; cPos != seqString.end() ; cPos ++,k++ ) {
			if (k>0 && ((k%60)==0)) out<<endl;
			out<<*cPos;
		}
		out<<endl;
	}
}