File: sequence.hpp

package info (click to toggle)
libbiosoup-dev 0.11.0-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 168 kB
  • sloc: cpp: 646; makefile: 12
file content (89 lines) | stat: -rw-r--r-- 2,547 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
// Copyright (c) 2020 Robert Vaser

#ifndef BIOSOUP_SEQUENCE_HPP_
#define BIOSOUP_SEQUENCE_HPP_

#include <algorithm>
#include <atomic>
#include <cctype>
#include <cstdint>
#include <string>

namespace biosoup {

struct Sequence {
 public:
  Sequence() = default;

  Sequence(const std::string& name, const std::string& data)
      : Sequence(name.c_str(), name.size(), data.c_str(), data.size()) {}

  Sequence(
      const char* name, std::uint32_t name_len,
      const char* data, std::uint32_t data_len)
      : id(num_objects++),
        name(name, name_len),
        data(data, data_len),
        quality() {}

  Sequence(
      const std::string& name,
      const std::string& data,
      const std::string& quality)
      : Sequence(
          name.c_str(), name.size(),
          data.c_str(), data.size(),
          quality.c_str(), quality.size()) {}

  Sequence(
      const char* name, std::uint32_t name_len,
      const char* data, std::uint32_t data_len,
      const char* quality, std::uint32_t quality_len)
      : id(num_objects++),
        name(name, name_len),
        data(data, data_len),
        quality(quality, quality_len) {}

  Sequence(const Sequence&) = default;
  Sequence& operator=(const Sequence&) = default;

  Sequence(Sequence&&) = default;
  Sequence& operator=(Sequence&&) = default;

  ~Sequence() = default;

  void ReverseAndComplement() {  // (optional) Watson-Crick base pairing
    for (auto& it : data) {
      switch (static_cast<char>(std::toupper(static_cast<unsigned char>(it)))) {
        case 'A': it = 'T'; break;
        case 'C': it = 'G'; break;
        case 'G': it = 'C'; break;
        case 'T': case 'U': it = 'A'; break;
        case 'R': it = 'Y'; break;  // A || G
        case 'Y': it = 'R'; break;  // C || T (U)
        case 'K': it = 'M'; break;  // G || T (U)
        case 'M': it = 'K'; break;  // A || C
        case 'S': break;  // C || G
        case 'W': break;  // A || T (U)
        case 'B': it = 'V'; break;  // !A
        case 'D': it = 'H'; break;  // !C
        case 'H': it = 'D'; break;  // !G
        case 'V': it = 'B'; break;  // !T (!U)
        default: break;  // N || -
      }
    }
    std::reverse(data.begin(), data.end());
    std::reverse(quality.begin(), quality.end());
  }

  static std::atomic<std::uint32_t> num_objects;

  std::uint32_t id;  // (optional) initialize num_objects to 0
  std::string name;
  std::string data;
  std::string quality;  // (optional) Phred quality scores
};

}  // namespace biosoup

#endif  // BIOSOUP_SEQUENCE_HPP_