File: OutputSampleListSet.hpp

package info (click to toggle)
pbseqlib 5.3.5%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 7,020 kB
  • sloc: cpp: 77,250; python: 331; sh: 103; makefile: 41
file content (128 lines) | stat: -rw-r--r-- 3,895 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#ifndef SIMULATOR_OUTPUT_SAMPLE_LIST_SET_H_
#define SIMULATOR_OUTPUT_SAMPLE_LIST_SET_H_

#include <iostream>
#include <map>
#include <string>

#include <alignment/simulator/OutputSampleList.hpp>
#include <pbdata/utils.hpp>

typedef std::map<std::string, OutputSampleList> OutputSampleListMap;
class OutputSampleListSet
{
public:
    OutputSampleListMap listMap;
    std::vector<std::string> keys;
    int keyLength;
    int nSufficient;
    int sampleSpaceSize;
    int keySize;
    int minSamples;
    int maxSamples;
    std::vector<int> lengths;
    OutputSampleListSet(int keySizeP)
    {
        minSamples = 500;
        maxSamples = 2000;
        nSufficient = 0;
        keySize = keySizeP;
        sampleSpaceSize = 1 << (2 * keySize);
    }

    void Write(std::ofstream &out)
    {
        // Say how many elements to write.
        OutputSampleListMap::iterator mapIt;
        int setSize = listMap.size();
        out.write((char *)&setSize, sizeof(int));
        int keySize = 0;
        // Say how large each element is.
        if (listMap.size() > 0) {
            keySize = listMap.begin()->first.size();
        }
        out.write((char *)&keySize, sizeof(int));

        for (mapIt = listMap.begin(); mapIt != listMap.end(); ++mapIt) {
            std::string mapItKey = mapIt->first;
            out.write((char *)mapItKey.c_str(), sizeof(char) * mapItKey.size());
            mapIt->second.Write(out);
        }
        int numLengths = lengths.size();
        out.write((char *)&numLengths, sizeof(int));
        for (size_t i = 0; i < lengths.size(); i++) {
            out.write((char *)&lengths[i], sizeof(int));
        }
    }

    void Read(std::string &inName)
    {
        std::ifstream in;
        CrucialOpen(inName, in, std::ios::in | std::ios::binary);
        Read(in);
        in.close();
    }

    void Read(std::ifstream &in)
    {
        int setSize;
        in.read((char *)&setSize, sizeof(int));
        in.read((char *)&keyLength, sizeof(int));

        if (keyLength == 0 or setSize == 0) {
            return;
        }
        char *key = ProtectedNew<char>(keyLength + 1);
        key[keyLength] = '\0';
        int i;
        for (i = 0; i < setSize; i++) {
            in.read(key, sizeof(char) * keyLength);
            listMap[key].Read(in);
        }
        int numLengths;
        in.read((char *)&numLengths, sizeof(int));
        if (numLengths > 0) {
            lengths.resize(numLengths);
        }
        for (i = 0; i < numLengths; i++) {
            in.read((char *)&lengths[i], sizeof(int));
        }
        if (key) {
            delete[] key;
            key = NULL;
        }
    }

    void AppendOutputSample(std::string key, OutputSample &sample)
    {
        if (static_cast<int>(listMap[key].size()) < minSamples) {
            if (static_cast<int>(listMap[key].size()) < maxSamples) {
                listMap[key].push_back(sample);
            }
            if (static_cast<int>(listMap[key].size()) == minSamples) {
                nSufficient++;
                std::cout << nSufficient << " / " << sampleSpaceSize << std::endl;
            }
        }
    }

    bool Sufficient() { return nSufficient == sampleSpaceSize; }

    void SampleRandomSample(std::string key, OutputSample &sample)
    {
        if (listMap.find(key) == listMap.end()) {
            std::cout << listMap.size() << std::endl;
            std::cout << "ERROR, " << key << " is not a sampled context." << std::endl;
            for (size_t i = 0; i < key.size(); i++) {
                char c = toupper(key[i]);
                if (c != 'A' and c != 'C' and c != 'G' and c != 'T') {
                    std::cout << "The nucleotide " << c << " is not supported." << std::endl;
                }
            }
            std::exit(EXIT_FAILURE);
        }
        sample = listMap[key][RandomInt(listMap[key].size())];
    }
};

#endif