File: FragmentStartPositionDistribution.hpp

package info (click to toggle)
salmon 0.7.2%2Bds1-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 4,352 kB
  • ctags: 5,243
  • sloc: cpp: 42,341; ansic: 6,252; python: 228; makefile: 207; sh: 190
file content (143 lines) | stat: -rw-r--r-- 4,960 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
/**
 * Model the bias in read start positions across a set of transcripts.
 * This class is similar to and inspired by the FragmentLengthDistribution
 * class, which was itself modified from lengthdistribution.h ---
 * originally written by Adam Roberts as part of the eXpress software.
 * updated by Rob Patro; 2014, 2015
 */

#ifndef FRAGMENT_START_POSITION_DISTRIBUTION
#define FRAGMENT_START_POSITION_DISTRIBUTION

#include "tbb/atomic.h"
#include <atomic>
#include <vector>
#include <string>
#include <mutex>

/**
 * The FragmentStartPositionDistribution class keeps track of the observed fragment
 * start position distribution. It is initialized with uniform prior with
 * with parameters specified by the arguments to the constructor. All
 * mass values and probabilities are stored and returned in log space (except
 * in to_string).
 */
class FragmentStartPositionDistribution {
  /**
   * A private vector that stores the observed (logged) mass for each length.
   */
  std::vector<tbb::atomic<double>> pmf_;
  std::vector<tbb::atomic<double>> cmf_;
  /**
   * A private double that stores the total observed (logged) mass.
   */
  tbb::atomic<double> totMass_;
  /**
   * A private double that stores the (logged) sum of the product of observed
   * lengths and masses for quick mean calculations.
   */
   tbb::atomic<double> sum_;
  /**
   * The number of bins we consider within each transcript.
   */
  size_t numBins_;

  // Mutex for this distribution
  std::mutex fspdMut_;
  std::atomic<bool> isUpdated_;
  std::atomic<bool> allowUpdates_;
  std::atomic<uint32_t> performingUpdate_;

public:
  /**
   * FragmentStartPositionDistribution constructor:
   * @param numBins The number of bins to consider for
   *                each transcript.
   *
   */
  FragmentStartPositionDistribution(uint32_t numBins=20);

   /**
   * A member function that updates the distribution based on a new length
   * observation.
   * @param len an integer for the observed length.
   * @param mass a double for the mass (logged) to add.
   */
  void addVal(int32_t hitPos, uint32_t txpLen, double mass);
  /**
   * A member function that returns the probability that a hit
   * starts at the specified position within the given transcript length.
   * @param hitPos The position where the fragment begins
   * @param txpLen The length of the transcript
   */
  double operator()(int32_t hitPos, uint32_t txpLen, double effLen);


   /**
   * A member function that computes the probability that a hit
   * starts at the specified position within the given transcript length.
   * The overall log probability is given by logNum - logDenom. The function
   * returns true if the probability is non-zero and false otherwise.
   * @param hitPos The position where the fragment begins
   * @param txpLen The length of the transcript
   * @param logEffLen the log of the effective length of the transcript
   * @param logNum the log of the numerator
   * @param logDenom the log of the denominator
   * @return true if the probaility is non-zero, false otherwise.
   */
  bool logNumDenomMass(
        int32_t hitPos,
        uint32_t txpLen,
        double logEffLen,
	double& logNum,
	double& logDenom);

  // Evaluate the CDF between two points
  double evalCDF(int32_t hitPos, uint32_t txpLen);
  // Update the distribution (compute the CDF) and
  // set isUpdated_;
  void update();

  /**
   * An accessor for the (logged) probability of a given length.
   * @param len an integer for the length to return the probability of.
   * @return (logged) probability of observing the given length.
   */
  //double pmf(size_t len) const;
  /**
   * A member function that returns a (logged) cumulative mass for a given
   * length.
   * @param len an integer for the length to return the cmf value of.
   * @return (Logged) cmf value of length.
   */
  //double cmf(size_t len) const;
  /**
   * A member function that returns a vector containing the (logged) cumulative
   * mass function *for the bins*.
   * @return (Logged) cmf of bins.
   */
  //std::vector<double> cmf() const;
  /**
   * An accessor for the (logged) observation mass (including pseudo-counts).
   * @return Total observation mass.
   */
  double totMass() const;
  /**
   * A member function that returns a string containing the current
   * distribution.
   * @return Space-separated string of probabilities ordered from length 0 to
   *         max_val (non-logged).
   */
   std::string toString() const;
  //std::string to_string() const;
  /**
   * A member function that appends the LengthDistribution parameters to the end
   * of the given file.
   * @param outfile the file to append to.
   * @param length_type a string specifying the type of length the distribution
   *        is of (ie. "Fragment" or "Target") to be included in the header.
   */
  //void append_output(std::ofstream& outfile, std::string length_type) const;
};

#endif // FRAGMENT_START_POSITION_DISTRIBUTION