File: SBModel.hpp

package info (click to toggle)
salmon 0.7.2%2Bds1-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 4,352 kB
  • ctags: 5,243
  • sloc: cpp: 42,341; ansic: 6,252; python: 228; makefile: 207; sh: 190
file content (90 lines) | stat: -rw-r--r-- 2,222 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#ifndef __SB_MODEL_HPP__
#define __SB_MODEL_HPP__

#include <boost/iostreams/filtering_stream.hpp>

#include "jellyfish/mer_dna.hpp"
#include "UtilityFunctions.hpp"
#include <Eigen/Dense>
#include <cmath>

using Mer = jellyfish::mer_dna_ns::mer_base_static<uint64_t, 4>;

class SBModel {
public:
  SBModel();   
  
  SBModel(const SBModel&) = default;
  SBModel(SBModel&&) = default;
  SBModel& operator=(const SBModel&) = default;
  SBModel& operator=(SBModel&&) = default;

  bool writeBinary(boost::iostreams::filtering_ostream& out) const; 

  inline int32_t contextBefore(bool rc) { return rc ? _contextRight : _contextLeft; }
  inline int32_t contextAfter(bool rc) { return rc ? _contextLeft : _contextRight; }

    bool addSequence(const char* seqIn, bool revCmp, double weight = 1.0);
    bool addSequence(const Mer& mer, double weight); 

    Eigen::MatrixXd& counts();
    Eigen::MatrixXd& marginals();
  
    double evaluateLog(const char* seqIn); 
    double evaluateLog(const Mer& mer);
 
  bool normalize();

  bool checkTransitionProbabilities();
  
  void combineCounts(const SBModel& other);

  void dumpConditionalProbabilities(std::ostream& os);

  int32_t getContextLength(); 

  template <typename CountVecT>
  bool train(CountVecT& kmerCounts, const uint32_t K);
  
  inline double evaluate(uint32_t kmer, uint32_t K) {
    std::vector<uint32_t> _order{0, 0, 2,2,2,2};
    double p{1.0};
    for (int32_t pos = 0; pos < K - _order.back(); ++pos) {
      uint32_t offset = 2 * (K - (pos + 1) - _order[pos]);
      auto idx = _getIndex(kmer, offset, _order[pos]);
      p *= _probs(idx, pos);
    }
    return p;
  }

private:
  inline uint32_t _getIndex(uint32_t kmer, uint32_t offset, uint32_t _order) {
    kmer >>= offset;
    switch (_order) {
    case 0:
      return kmer & 0x3;
    case 1:
      return kmer & 0xF;
    case 2:
      return kmer & 0x3F;
    default:
      return 0;
    }
    return 0;
  }
  bool _trained;

  int32_t _contextLength;
  int32_t _contextLeft;
  int32_t _contextRight;

  Eigen::MatrixXd _probs;
  Eigen::MatrixXd _marginals;

  Mer _mer;
  std::vector<int32_t> _order;
  std::vector<int32_t> _shifts;
  std::vector<int32_t> _widths;
};

#endif //__SB_MODEL_HPP__