File: mcf_fasta_sequence.cc

package info (click to toggle)
tantan 23-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 492 kB
  • sloc: cpp: 2,062; sh: 58; makefile: 39
file content (113 lines) | stat: -rw-r--r-- 2,984 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
// Copyright 2010, 2011 Martin C. Frith

#include "mcf_fasta_sequence.hh"

#include <cctype>  // isspace
//#include <iostream>  // for debugging
#include <istream>
#include <iterator>  // istreambuf_iterator, ostreambuf_iterator
#include <ostream>

namespace mcf {

static void readSequence(std::istream &s, std::vector<uchar> &sequence,
                         char delimiter) {
  if (!s) return;
  std::istreambuf_iterator<char> inpos(s);
  std::istreambuf_iterator<char> endpos;
  while (inpos != endpos) {
    char c = *inpos;
    if (c == delimiter) break;
    uchar u = static_cast<uchar>(c);
    if (!std::isspace(u)) sequence.push_back(u);
    ++inpos;
  }
}

static void readQualityCodes(std::istream &s, std::vector<uchar> &qualityCodes,
                             std::vector<uchar>::size_type howMany) {
  if (!s) return;
  std::istreambuf_iterator<char> inpos(s);
  std::istreambuf_iterator<char> endpos;
  while (inpos != endpos) {
    if (qualityCodes.size() == howMany) break;
    char c = *inpos;
    uchar u = static_cast<uchar>(c);
    if (!std::isspace(u)) qualityCodes.push_back(u);
    ++inpos;
  }
}

std::istream &operator>>(std::istream &s, FastaSequence &f) {
  std::string title;
  std::vector<uchar> sequence;
  std::string secondTitle;
  std::vector<uchar> qualityCodes;

  char firstChar = '>';
  s >> firstChar;
  if (firstChar != '>' && firstChar != '@') s.setstate(std::ios::failbit);
  getline(s, title);

  if (firstChar == '>') {
    readSequence(s, sequence, '>');
  } else {
    readSequence(s, sequence, '+');
    char secondChar;
    s >> secondChar;
    getline(s, secondTitle);
    readQualityCodes(s, qualityCodes, sequence.size());
    // perhaps check whether we read enough quality codes
  }

  if (!s) return s;

  f.title.swap(title);
  f.sequence.swap(sequence);
  f.secondTitle.swap(secondTitle);
  f.qualityCodes.swap(qualityCodes);

  return s;
}

static void writeOneLine(std::ostream &s, const std::vector<uchar> &v) {
  std::ostreambuf_iterator<char> o(s);
  std::vector<uchar>::const_iterator b = v.begin();
  std::vector<uchar>::const_iterator e = v.end();
  while (b != e) {
    o = static_cast<char>(*b);
    ++b;
  }
  o = '\n';
}

static void writeMultiLines(std::ostream &s, const std::vector<uchar> &v) {
  const int lettersPerLine = 50;  // ?
  std::ostreambuf_iterator<char> o(s);
  std::vector<uchar>::const_iterator b = v.begin();
  std::vector<uchar>::const_iterator e = v.end();
  std::vector<uchar>::const_iterator i = b;
  while (i != e) {
    o = static_cast<char>(*i);
    ++i;
    if (i - b == lettersPerLine || i == e) {
      o = '\n';
      b = i;
    }
  }
}

std::ostream &operator<<(std::ostream &s, const FastaSequence &f) {
  if (f.qualityCodes.empty()) {
    s << '>' << f.title << '\n';
    writeMultiLines(s, f.sequence);
  } else {
    s << '@' << f.title << '\n';
    writeOneLine(s, f.sequence);
    s << '+' << f.secondTitle << '\n';
    writeOneLine(s, f.qualityCodes);
  }
  return s;
}

}