1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
|
// $Id: fastaFormat.cpp 10280 2012-02-06 09:45:26Z itaymay $
#include "fastaFormat.h"
#include "someUtil.h"
#include "errorMsg.h"
#include "ConversionUtils.h"
#include <algorithm>
using namespace std;
sequenceContainer fastaFormat::read(istream &infile, const alphabet* alph) {
sequenceContainer mySeqData = readUnAligned(infile, alph);
mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
return mySeqData;
}
sequenceContainer fastaFormat::readUnAligned(istream &infile, const alphabet* alph) {
sequenceContainer mySeqData;
vector<string> seqFileData;
putFileIntoVectorStringArray(infile,seqFileData);
if (seqFileData.empty()){
errorMsg::reportError("unable to open file, or file is empty in fasta format");
}
vector<string>::const_iterator it1;
int localid=0;
for (it1 = seqFileData.begin(); it1!= seqFileData.end(); ) {
if (it1->empty()) {++it1;continue; }// empty line continue
string remark;
string name;
if ((*it1)[0] == '>') {
string::const_iterator itstrtmp = (*it1).begin();
itstrtmp++;
while (itstrtmp != (*it1).end()) {
name+= *itstrtmp;
itstrtmp++;
}
//for (string::iterator i = name.begin(); i!=(name.end()-2);++i) {
// *i=*(i+1); // removing the ">". should be done more elegant...
//}
++it1;
} else {
LOG(0,<<"problem in line: "<<*it1<<endl);
errorMsg::reportError("Error reading fasta file, error finding sequence name starting with >",1);
}
while (it1->empty()) it1++; // empty line continue
string str;
while (it1!= seqFileData.end()) {
if ((*it1)[0] == '>') break;
str+=*it1;
++it1;
}
// remove spaces form str;
str = takeCharOutOfString(" \t", str);
name = trim(name);
mySeqData.add(sequence(str,name,remark,localid,alph));
localid++;
}
return mySeqData;
}
void fastaFormat::write(ostream &out, const sequenceContainer& sd) {
for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) {
out<<">"<<(it5)->name()<<endl;
out<<it5->toString()<<endl;
}
}
|