File: phylipSequentialFormat.cpp

package info (click to toggle)
fastml 3.11-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,772 kB
  • sloc: cpp: 48,522; perl: 3,588; ansic: 819; makefile: 386; python: 83; sh: 55
file content (130 lines) | stat: -rw-r--r-- 4,428 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
// $Id: phylipFormat.cpp 962 2006-11-07 15:13:34Z privmane $

#include "phylipSequentialFormat.h"
#include "someUtil.h"
#include "errorMsg.h"
#include "logFile.h"

sequenceContainer phylipSequentialFormat::read(istream &infile, const alphabet* alph){
	sequenceContainer mySeqData = readUnAligned(infile, alph);
	mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
	return mySeqData;
}
sequenceContainer phylipSequentialFormat::readUnAligned(istream &infile, const alphabet* alph){
	sequenceContainer mySeqData;

	vector<string> seqFileData;
	putFileIntoVectorStringArray(infile,seqFileData);

	vector<string>::const_iterator currentLinePosition = seqFileData.begin();
	string::const_iterator itStr = seqFileData.begin()->begin();
	string::const_iterator itStrEnd = seqFileData.begin()->end();

	int f_numSeq;
	bool readSeqNum= fromStringIterToInt(itStr,itStrEnd,f_numSeq);
	if (readSeqNum == false) errorMsg::reportError("Error reading number of sequences while reading PHYLIP sequence format");
	int f_seqLength;
	bool readSeqLen= fromStringIterToInt(itStr,itStrEnd,f_seqLength);
	if (readSeqLen == false) errorMsg::reportError("Error reading the sequences length while reading PHYLIP sequence format");
	currentLinePosition++; // we read the first line.

	int localid=0;
	for (; currentLinePosition != seqFileData.end() ; ) {
		if (currentLinePosition->empty()) {++currentLinePosition;continue;} // empty line continue
		string stringSeq1;
		string name1;
		while (stringSeq1.length() < f_seqLength ) { // adding a new seq			
			string::const_iterator it2 = (currentLinePosition)->begin();
			if ((*it2)==' ') { // line without seq. name, read seq. content only
				for (; it2 != (currentLinePosition)->end();++it2) {
					if ((*it2)==' ') continue;
					else stringSeq1+=(*it2);
				}
			}
			else { // first read sequence name, then read seq itself
				for (; it2 != (currentLinePosition)->end();++it2) {
					if ((*it2)==' ') break;
					else name1+=(*it2);
				}
				for (; it2 != (currentLinePosition)->end();++it2) {
					if ((*it2)==' ') continue;
					else stringSeq1+=(*it2);
				}
			}
			
			currentLinePosition++;
		}
		mySeqData.add(sequence(stringSeq1,name1,"",localid,alph));
		localid++;

	}
	return mySeqData;
}

void phylipSequentialFormat::write(ostream &out, const sequenceContainer& sd,
						 const int numOfPositionInLine,
						 const int spaceEvery) {
	sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();
	for (;it5!=sd.constTaxaEnd();++it5) {
		if (it5->name().size() > 10) break;
	}
	if (it5 != sd.constTaxaEnd()) {
		LOG(1,<<"you asked to print in phylip format\n");
		LOG(1,<<"however, the names in phylip format\n");
		LOG(1,<<"must be no more than 10 characters.\n");
		LOG(1,<<"Names are hence trancated to ten   \n");
		LOG(1,<<"characters. Notice, that this might\n");
		LOG(1,<<"result in a two or more sequences  \n");
		LOG(1,<<"having the same name               \n");
	}
	
	//	vector<const sequenceContainer::sequenceDatum*> vec;
	//	sd.getSequenceDatumPtrVector(vec);
	out<<sd.numberOfSeqs()<<"   "<<sd.seqLen();
	if (sd.constTaxaBegin()==sd.constTaxaEnd()) return;
	
	int maxLengthOfSeqName =0;
	maxLengthOfSeqName=10;	// all this maxLengthOfSeqName is the 


	for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) {
		int currentPosition = 0;
		out<<endl;
		out.flush();
		// first - print name of sequence
		for (int iName = 0 ;iName<maxLengthOfSeqName; ++iName) {
			if (iName<it5->name().size()) {
				if (currentPosition<numOfPositionInLine) {
					out<<it5->name()[iName];
				}
				else out<<" ";
				out.flush();
			}
			else out<<" ";
		}
		out.flush();
		out<<" ";
		// next - print sequence itself
		while (currentPosition < sd.seqLen() ) {
			if (it5->seqLen()<numOfPositionInLine) 
				out<<it5->toString()<<endl;
			else {
				for (int k=currentPosition; k < currentPosition+numOfPositionInLine; ++k) {
					if (k>=it5->seqLen()) break;
					out<<it5->toString(k);
					if (((k+1)%spaceEvery==0) && (((k+1)%numOfPositionInLine!=0))) out<<" ";
				}
				out<<endl;
				if (currentPosition+numOfPositionInLine < sd.seqLen()) {
				for (int i = 0; i <  spaceEvery +1; i++) // creates spaces to align properly
					out << " ";
				}
			}
			currentPosition +=numOfPositionInLine;
		}
		
	}

}