File: LengthHistogram.cpp

package info (click to toggle)
pbseqlib 0~20161219-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 5,924 kB
  • ctags: 5,123
  • sloc: cpp: 82,727; makefile: 305; python: 239; sh: 8
file content (56 lines) | stat: -rw-r--r-- 1,672 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#include "LengthHistogram.hpp"

int LengthHistogram::Read(std::string &inName) {
    std::ifstream in;
    CrucialOpen(inName, in, std::ios::in);
    return Read(in);
}

int LengthHistogram::Read(std::ifstream &in) {
    while(in) {
        int length, count;
        in >> length;
        in >> count;
        lengthHistogram.data.push_back(length);
        if (lengthHistogram.cdf.size() == 0) {
            lengthHistogram.cdf.push_back(count);
        }
        else {
            lengthHistogram.cdf.push_back(lengthHistogram.cdf[lengthHistogram.cdf.size()-1] + count);
        }
    }
    return 1;
}

void LengthHistogram::GetRandomLength(int &length) {
    lengthHistogram.SelectRandomValue(length);
}

void LengthHistogram::BuildFromAlignmentLengths(std::vector<int> &lengths) {
    sort(lengths.begin(), lengths.end());
    for (size_t f = 0, i = 1; i < lengths.size(); i++) {
        if (lengths[i] != lengths[f]) {
            lengthHistogram.data.push_back(lengths[f]);
            lengthHistogram.cdf.push_back(i);
            f = i;
        }
    }
    if (lengths.size() != 0) {
        lengthHistogram.data.push_back(lengths[lengths.size()-1]);
        lengthHistogram.cdf.push_back(lengths.size());
    }
    /* Tests:
     * indices                0 1 2 3 4  5  6
     * lengths:               1 3 5 9 10 10 11
     * lengthHistogram.data:  1 3 5 9 10 11
     * lengthHistogram.cdf :  1 2 3 4 6  7
     *
     * lengths:               1 3 5 9 10 11
     * lengthHistogram.data:  1 3 5 9 10 11
     * lengthHistogram.cdf :  1 2 3 4 5  6
     *
     * lengths:               10
     * lengthHistogram.data:  10
     * lengthHistogram.cdf :  1 
     */ 
}