1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
|
#ifndef FASTAREADER_H
#define FASTAREADER_H 1
#include "Common/Sequence.h"
#include "Common/StringUtil.h" // for chomp
#include <cassert>
#include <cstdlib> // for exit
#include <fstream>
#include <istream>
#include <limits> // for numeric_limits
#include <ostream>
/** Read a FASTA, FASTQ, export, qseq or SAM file. */
class FastaReader {
public:
enum {
/** Fold lower-case characters to upper-case. */
FOLD_CASE = 0, NO_FOLD_CASE = 1,
/** Convert to standard quality. */
NO_CONVERT_QUALITY = 0, CONVERT_QUALITY = 2,
};
bool flagFoldCase() { return ~m_flags & NO_FOLD_CASE; }
bool flagConvertQual() { return m_flags & CONVERT_QUALITY; }
FastaReader(const char* path, int flags, int len = 0);
~FastaReader()
{
if (!m_in.eof()) {
std::string line;
getline(line);
die() << "expected end-of-file near\n"
<< line << '\n';
exit(EXIT_FAILURE);
}
}
Sequence read(std::string& id, std::string& comment,
char& anchor, std::string& qual);
/** Split the fasta file into nsections and seek to the start
* of section. */
void split(unsigned section, unsigned nsections);
/** Return whether this stream is at end-of-file. */
bool eof() const { return m_in.eof(); };
/** Return true if failbit or badbit of stream is set. */
bool fail() const { return m_in.fail(); };
/** Return whether this stream is good. */
operator const void*() const { return m_in ? this : NULL; }
/** Return the next character of this stream. */
int peek() { return m_in.peek(); }
/** Interface for manipulators. */
FastaReader& operator>>(std::istream& (*f)(std::istream&))
{
f(m_in);
return *this;
}
/** Returns the number of unchaste reads. */
unsigned unchaste() const { return m_unchaste; }
FastaReader& operator >>(Sequence& seq)
{
std::string id, comment, qual;
char anchor;
seq = this->read(id, comment, anchor, qual);
return *this;
}
private:
/** Read a single line. */
std::istream& getline(std::string& s)
{
if (std::getline(m_in, s)) {
chomp(s, '\r');
m_line++;
}
return m_in;
}
/** Ignore the specified number of lines. */
std::istream& ignoreLines(unsigned n)
{
for (unsigned i = 0; i < n; ++i) {
if (m_in.ignore(
std::numeric_limits<std::streamsize>::max(),
'\n'))
m_line++;
}
return m_in;
}
std::ostream& die();
bool isChaste(const std::string& s, const std::string& line);
void checkSeqQual(const std::string& s, const std::string& q);
const char* m_path;
std::ifstream m_fin;
std::istream& m_in;
/** Flags indicating parsing options. */
int m_flags;
/** Number of lines read. */
unsigned m_line;
/** Count of unchaste reads. */
unsigned m_unchaste;
/** Position of the end of the current section. */
std::streampos m_end;
/** Trim sequences to this length. 0 is unlimited. */
const int m_maxLength;
};
/** A FASTA record. */
struct FastaRecord
{
/** Identifier */
std::string id;
/** Comment following the first white-space of the header */
std::string comment;
/** Anchor base for a colour-space sequence */
char anchor;
/** The sequence */
Sequence seq;
FastaRecord() : anchor(0) { }
FastaRecord(const std::string& id, const std::string& comment,
const Sequence& seq)
: id(id), comment(comment), anchor(0), seq(seq) { }
operator Sequence() const { return seq; }
FastaRecord& operator=(const std::string& s)
{
seq = s;
return *this;
}
size_t size() const { return seq.size(); }
friend FastaReader& operator >>(FastaReader& in, FastaRecord& o)
{
std::string q;
o.seq = in.read(o.id, o.comment, o.anchor, q);
return in;
}
friend std::ostream& operator <<(std::ostream& out,
const FastaRecord& o)
{
out << '>' << o.id;
if (!o.comment.empty())
out << ' ' << o.comment;
return out << '\n' << o.seq << '\n';
}
};
/** A FASTQ record. */
struct FastqRecord : FastaRecord
{
/** Quality */
std::string qual;
FastqRecord() { }
FastqRecord(const std::string& id, const std::string& comment,
const Sequence& seq, const std::string& qual)
: FastaRecord(id, comment, seq), qual(qual)
{
assert(seq.length() == qual.length());
}
friend FastaReader& operator >>(FastaReader& in, FastqRecord& o)
{
o.seq = in.read(o.id, o.comment, o.anchor, o.qual);
return in;
}
friend std::ostream& operator <<(std::ostream& out,
const FastqRecord& o)
{
if (o.qual.empty())
return out << static_cast<const FastaRecord&>(o);
out << '@' << o.id;
if (!o.comment.empty())
out << ' ' << o.comment;
return out << '\n' << o.seq << "\n"
"+\n" << o.qual << '\n';
}
};
#endif //FASTAREADER_H
|