1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
|
// $Id: phylipFormat.cpp 962 2006-11-07 15:13:34Z privmane $
#include "phylipFormat.h"
#include "someUtil.h"
#include "errorMsg.h"
#include "logFile.h"
sequenceContainer phylipFormat::read(istream &infile, const alphabet* alph){
sequenceContainer mySeqData = readUnAligned(infile, alph);
mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
return mySeqData;
}
sequenceContainer phylipFormat::readUnAligned(istream &infile, const alphabet* alph){
sequenceContainer mySeqData;
vector<string> seqFileData;
putFileIntoVectorStringArray(infile,seqFileData);
vector<string>::const_iterator currentLinePosition = seqFileData.begin();
string::const_iterator itStr = seqFileData.begin()->begin();
string::const_iterator itStrEnd = seqFileData.begin()->end();
int f_numSeq;
bool readSeqNum= fromStringIterToInt(itStr,itStrEnd,f_numSeq);
if (readSeqNum == false) errorMsg::reportError("Error reading number of sequences while reading PHYLIP sequence format");
int f_seqLength;
bool readSeqLen= fromStringIterToInt(itStr,itStrEnd,f_seqLength);
if (readSeqLen == false) errorMsg::reportError("Error reading the sequences length while reading PHYLIP sequence format");
currentLinePosition++; // we read the first line.
int localid=0;
for (; currentLinePosition != seqFileData.end() ; ) {
if (currentLinePosition->empty()) {++currentLinePosition;continue;} // empty line constinue
string remark;
string name;
sequence seq(alph);
if (mySeqData.numberOfSeqs() < f_numSeq ) {//get from the line a name and a sequence;
string name1;
string stringSeq1;
string::const_iterator it2 = (currentLinePosition)->begin();
for (; it2 != (currentLinePosition)->end();++it2) {
if ((*it2)==' ') break;
else name1+=(*it2);
}
for (; it2 != (currentLinePosition)->end();++it2) {
if ((*it2)==' ') continue;
else stringSeq1+=(*it2);
}
mySeqData.add(sequence(stringSeq1,name1,remark,localid,alph));
currentLinePosition++;
localid++;
}
else { // adding to the
string stringSeq1;
string::const_iterator it2 = (currentLinePosition)->begin();
int sequenceId=localid%f_numSeq;
for (; it2 != (currentLinePosition)->end() &&
mySeqData[sequenceId].seqLen() <f_seqLength;++it2) {
if ((*it2)==' ') continue;
else stringSeq1+=(*it2);
}
sequence tmp(stringSeq1,"","",sequenceId,alph);
mySeqData[sequenceId].operator += (tmp);
currentLinePosition++;
localid++;
}
}
return mySeqData;
}
void phylipFormat::write(ostream &out, const sequenceContainer& sd,
const int numOfPositionInLine,
const int spaceEvery) {
sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();
for (;it5!=sd.constTaxaEnd();++it5) {
if (it5->name().size() > 10) break;
}
if (it5 != sd.constTaxaEnd()) {
LOG(1,<<"you asked to print in phylip format\n");
LOG(1,<<"however, the names in phylip format\n");
LOG(1,<<"must be no more than 10 characters.\n");
LOG(1,<<"Names are hence trancated to ten \n");
LOG(1,<<"characters. Notice, that this might\n");
LOG(1,<<"result in a two or more sequences \n");
LOG(1,<<"having the same name \n");
}
// vector<const sequenceContainer::sequenceDatum*> vec;
// sd.getSequenceDatumPtrVector(vec);
out<<sd.numberOfSeqs()<<" "<<sd.seqLen();
if (sd.constTaxaBegin()==sd.constTaxaEnd()) return;
int maxLengthOfSeqName =0;
maxLengthOfSeqName=10; // all this maxLengthOfSeqName is the
int currentPosition = 0;
while (currentPosition < sd.seqLen() ) {
out<<endl;
out.flush();
// for (vector<const sequenceContainer::sequenceDatum*>::const_iterator it5= vec.begin(); it5!=vec.end(); ++ it5) {
for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) {
for (int iName = 0 ;iName<maxLengthOfSeqName; ++iName) {
if (iName<it5->name().size()) {
if (currentPosition<numOfPositionInLine) {
out<<it5->name()[iName];
}
else out<<" ";
out.flush();
}
else out<<" ";
}
out.flush();
out<<" ";
if (it5->seqLen()<numOfPositionInLine)
out<<it5->toString()<<endl;
else {
for (int k=currentPosition; k < currentPosition+numOfPositionInLine; ++k) {
if (k>=it5->seqLen()) break;
out<<it5->toString(k);
if (((k+1)%spaceEvery==0) && (((k+1)%numOfPositionInLine!=0))) out<<" ";
}
out<<endl;
}
}
currentPosition +=numOfPositionInLine;
}
return;
}
|