File: fastaFormat.cpp

package info (click to toggle)
fastml 3.11-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,772 kB
  • sloc: cpp: 48,522; perl: 3,588; ansic: 819; makefile: 386; python: 83; sh: 55
file content (74 lines) | stat: -rw-r--r-- 2,017 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
// $Id: fastaFormat.cpp 10280 2012-02-06 09:45:26Z itaymay $
#include "fastaFormat.h"
#include "someUtil.h"
#include "errorMsg.h"
#include "ConversionUtils.h"
#include <algorithm>
using namespace std;

sequenceContainer fastaFormat::read(istream &infile, const alphabet* alph) {
	sequenceContainer mySeqData = readUnAligned(infile, alph);
	mySeqData.makeSureAllSeqAreSameLengthAndGetLen();
	return mySeqData;
}


sequenceContainer fastaFormat::readUnAligned(istream &infile, const alphabet* alph) {
	sequenceContainer mySeqData;

	vector<string> seqFileData;
	putFileIntoVectorStringArray(infile,seqFileData);
	if (seqFileData.empty()){
		errorMsg::reportError("unable to open file, or file is empty in fasta format");
	}

	vector<string>::const_iterator it1;
	int localid=0;
	for (it1 = seqFileData.begin(); it1!= seqFileData.end(); ) {
		if (it1->empty()) {++it1;continue; }// empty line continue

		string remark;
		string name;

		if ((*it1)[0] == '>') {
			string::const_iterator itstrtmp = (*it1).begin();
			itstrtmp++;
			while (itstrtmp != (*it1).end()) {
				name+= *itstrtmp;
				itstrtmp++;
			}

			//for (string::iterator i = name.begin(); i!=(name.end()-2);++i) {
			//	*i=*(i+1); // removing the ">". should be done more elegant...
			//}
			++it1;
		} else {
			LOG(0,<<"problem in line: "<<*it1<<endl);
			errorMsg::reportError("Error reading fasta file, error finding sequence name starting with >",1);
		}
		while (it1->empty()) it1++; // empty line continue
		
		string str;
		while (it1!= seqFileData.end()) {
			if ((*it1)[0] == '>') break;
			str+=*it1;
			++it1;
		}
		// remove spaces form str;
		str = takeCharOutOfString(" \t", str);
		name = trim(name);
		mySeqData.add(sequence(str,name,remark,localid,alph));
		localid++;
	}

	return mySeqData;
}


void fastaFormat::write(ostream &out, const sequenceContainer& sd) {
	for (sequenceContainer::constTaxaIterator it5=sd.constTaxaBegin();it5!=sd.constTaxaEnd();++it5) {
		out<<">"<<(it5)->name()<<endl;
		out<<it5->toString()<<endl;
	}
}