File: LibraryTypeDetector.hpp

package info (click to toggle)
salmon 1.10.3%2Bds1-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 35,148 kB
  • sloc: cpp: 200,707; ansic: 171,082; sh: 859; python: 792; makefile: 238
file content (163 lines) | stat: -rw-r--r-- 5,676 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#ifndef __LIBRARY_TYPE_DETECTOR__
#define __LIBRARY_TYPE_DETECTOR__

#include "spdlog/fmt/fmt.h"
#include "spdlog/fmt/ostr.h"
#include "spdlog/sinks/ostream_sink.h"
#include "spdlog/spdlog.h"

#include "LibraryFormat.hpp"

#include <atomic>
#include <mutex>

class LibraryTypeDetector {
public:
  LibraryTypeDetector(ReadType type)
      : type_(type), libTypeCounts_(std::vector<std::atomic<uint64_t>>(
                         LibraryFormat::maxLibTypeID() + 1)) {}

  LibraryTypeDetector(const LibraryTypeDetector& other) {
    active_.store(other.active_.load());
    type_ = other.type_;
    numSamplesNeeded_.store(other.numSamplesNeeded_.load());
    libTypeCounts_ =
        std::vector<std::atomic<uint64_t>>(LibraryFormat::maxLibTypeID() + 1);
    for (size_t i = 0; i < other.libTypeCounts_.size(); ++i) {
      libTypeCounts_[i] = other.libTypeCounts_[i].load();
    }
  }

  bool isActive() { return active_.load(); }
  bool canGuess() { return numSamplesNeeded_ <= 0; }

  bool mostLikelyType(LibraryFormat& ifmt) {
    bool ret{false};
    if (mut_.try_lock()) {
      if (active_.load()) {
        if (type_ == ReadType::SINGLE_END) {
          uint64_t nf{0};
          uint64_t nr{0};
          for (size_t i = 0; i <= LibraryFormat::maxLibTypeID(); ++i) {
            auto f = LibraryFormat::formatFromID(i);
            auto c = libTypeCounts_[i].load();
            nf += (f.strandedness == ReadStrandedness::S) ? c : 0;
            nr += (f.strandedness == ReadStrandedness::A) ? c : 0;
          }
          double ratio = -1.0;
          if (nf + nr > 0) {
            ratio = static_cast<double>(nf) / (nf + nr);
          }

          ifmt.type = type_;
          ifmt.orientation = ReadOrientation::NONE;

          // If we have some issue computing this, leave as unstranded
          if (ratio < 0.0) {
            ifmt.strandedness = ReadStrandedness::U;
          } else if (ratio < 0.3) {
            // If we map to the forward strand < 30% of the time, we are
            // antisesnse
            ifmt.strandedness = ReadStrandedness::A;
          } else if (ratio < 0.7) {
            // Between 30% and 70% of the time, we are unstranded
            ifmt.strandedness = ReadStrandedness::U;
          } else {
            // Greater than 70% of the time, we are sense
            ifmt.strandedness = ReadStrandedness::S;
          }
        } else { // paired end
          uint64_t nsf{0};
          uint64_t nsr{0};

          uint64_t ninward{0};
          uint64_t noutward{0};
          uint64_t nsame{0};
          for (size_t i = 0; i <= LibraryFormat::maxLibTypeID(); ++i) {
            auto f = LibraryFormat::formatFromID(i);
            auto c = libTypeCounts_[i].load();
            nsf += (f.strandedness == ReadStrandedness::S or
                    f.strandedness == ReadStrandedness::SA)
                       ? c
                       : 0;
            nsr += (f.strandedness == ReadStrandedness::A or
                    f.strandedness == ReadStrandedness::AS)
                       ? c
                       : 0;
            ninward += (f.orientation == ReadOrientation::TOWARD) ? c : 0;
            noutward += (f.orientation == ReadOrientation::AWAY) ? c : 0;
            nsame += (f.orientation == ReadOrientation::SAME) ? c : 0;
          }

          ifmt.type = type_;
          if ((ninward + noutward + nsame > 0) and (nsf + nsr > 0)) {
            auto numOrient = ninward + noutward + nsame;
            double ratioIn = static_cast<double>(ninward) / numOrient;
            double ratioOut = static_cast<double>(noutward) / numOrient;
            double ratioSame = static_cast<double>(nsame) / numOrient;

            ifmt.orientation = ReadOrientation::NONE;
            bool same{false};

            if (ratioIn >= ratioOut and ratioIn >= ratioSame) {
              ifmt.orientation = ReadOrientation::TOWARD;
            } else if (ratioOut >= ratioIn and ratioOut >= ratioSame) {
              ifmt.orientation = ReadOrientation::AWAY;
            } else {
              ifmt.orientation = ReadOrientation::SAME;
              same = true;
            }

            auto numStrand = nsf + nsr;
            double ratioFW = static_cast<double>(nsf) / numStrand;
            if (ratioFW < 0.3) {
              ifmt.strandedness =
                  (same) ? ReadStrandedness::A : ReadStrandedness::AS;
            } else if (ratioFW < 0.7) {
              ifmt.strandedness = ReadStrandedness::U;
            } else {
              ifmt.strandedness =
                  (same) ? ReadStrandedness::S : ReadStrandedness::SA;
            }

          } else {
            ifmt.orientation = ReadOrientation::TOWARD;
            ifmt.strandedness = ReadStrandedness::U;
          }
        } // end paired-end

        auto log = spdlog::get("jointLog");
        log->info("Automatically detected most likely library type as {}\n",
                  ifmt.toString());

        active_.store(false);
        ret = true;
      } // end if active_

      mut_.unlock(); // release the lock
    }                // end try_lock()
    return ret;
  }

  void addSample(LibraryFormat f) {
    if (f.type == type_ and numSamplesNeeded_ >= 0) {
      ++libTypeCounts_[f.formatID()];
      --numSamplesNeeded_;
    }
  }

private:
  // set to false once we have guessed the type
  std::atomic_bool active_{true};
  std::mutex mut_;

  // single or paired-end
  ReadType type_;
  // number of samples needed before we can guess a type
  std::atomic<int64_t> numSamplesNeeded_{50000};

  // the counts for each library type
  std::vector<std::atomic<uint64_t>> libTypeCounts_;
};

#endif //__LIBRARY_TYPE_DETECTOR__