1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
|
// $Id: phylipFormat.cpp 962 2006-11-07 15:13:34Z privmane $
#include "phylipSequentialFormat.h"
#include "someUtil.h"
#include "errorMsg.h"
#include "logFile.h"
sequenceContainer phylipSequentialFormat::read(istream &infile, const alphabet* alph){
sequenceContainer mySeqData = readUnAligned(infile, alph);
mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
return mySeqData;
}
sequenceContainer phylipSequentialFormat::readUnAligned(istream &infile, const alphabet* alph){
sequenceContainer mySeqData;
vector<string> seqFileData;
putFileIntoVectorStringArray(infile,seqFileData);
vector<string>::const_iterator currentLinePosition = seqFileData.begin();
string::const_iterator itStr = seqFileData.begin()->begin();
string::const_iterator itStrEnd = seqFileData.begin()->end();
int f_numSeq;
bool readSeqNum= fromStringIterToInt(itStr,itStrEnd,f_numSeq);
if (readSeqNum == false) errorMsg::reportError("Error reading number of sequences while reading PHYLIP sequence format");
int f_seqLength;
bool readSeqLen= fromStringIterToInt(itStr,itStrEnd,f_seqLength);
if (readSeqLen == false) errorMsg::reportError("Error reading the sequences length while reading PHYLIP sequence format");
currentLinePosition++; // we read the first line.
int localid=0;
for (; currentLinePosition != seqFileData.end() ; ) {
if (currentLinePosition->empty()) {++currentLinePosition;continue;} // empty line continue
string stringSeq1;
string name1;
while (stringSeq1.length() < f_seqLength ) { // adding a new seq
string::const_iterator it2 = (currentLinePosition)->begin();
if ((*it2)==' ') { // line without seq. name, read seq. content only
for (; it2 != (currentLinePosition)->end();++it2) {
if ((*it2)==' ') continue;
else stringSeq1+=(*it2);
}
}
else { // first read sequence name, then read seq itself
for (; it2 != (currentLinePosition)->end();++it2) {
if ((*it2)==' ') break;
else name1+=(*it2);
}
for (; it2 != (currentLinePosition)->end();++it2) {
if ((*it2)==' ') continue;
else stringSeq1+=(*it2);
}
}
currentLinePosition++;
}
mySeqData.add(sequence(stringSeq1,name1,"",localid,alph));
localid++;
}
return mySeqData;
}
void phylipSequentialFormat::write(ostream &out, const sequenceContainer& sd,
const int numOfPositionInLine,
const int spaceEvery) {
sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();
for (;it5!=sd.constTaxaEnd();++it5) {
if (it5->name().size() > 10) break;
}
if (it5 != sd.constTaxaEnd()) {
LOG(1,<<"you asked to print in phylip format\n");
LOG(1,<<"however, the names in phylip format\n");
LOG(1,<<"must be no more than 10 characters.\n");
LOG(1,<<"Names are hence trancated to ten \n");
LOG(1,<<"characters. Notice, that this might\n");
LOG(1,<<"result in a two or more sequences \n");
LOG(1,<<"having the same name \n");
}
// vector<const sequenceContainer::sequenceDatum*> vec;
// sd.getSequenceDatumPtrVector(vec);
out<<sd.numberOfSeqs()<<" "<<sd.seqLen();
if (sd.constTaxaBegin()==sd.constTaxaEnd()) return;
int maxLengthOfSeqName =0;
maxLengthOfSeqName=10; // all this maxLengthOfSeqName is the
for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) {
int currentPosition = 0;
out<<endl;
out.flush();
// first - print name of sequence
for (int iName = 0 ;iName<maxLengthOfSeqName; ++iName) {
if (iName<it5->name().size()) {
if (currentPosition<numOfPositionInLine) {
out<<it5->name()[iName];
}
else out<<" ";
out.flush();
}
else out<<" ";
}
out.flush();
out<<" ";
// next - print sequence itself
while (currentPosition < sd.seqLen() ) {
if (it5->seqLen()<numOfPositionInLine)
out<<it5->toString()<<endl;
else {
for (int k=currentPosition; k < currentPosition+numOfPositionInLine; ++k) {
if (k>=it5->seqLen()) break;
out<<it5->toString(k);
if (((k+1)%spaceEvery==0) && (((k+1)%numOfPositionInLine!=0))) out<<" ";
}
out<<endl;
if (currentPosition+numOfPositionInLine < sd.seqLen()) {
for (int i = 0; i < spaceEvery +1; i++) // creates spaces to align properly
out << " ";
}
}
currentPosition +=numOfPositionInLine;
}
}
}
|