File: OutputSampleListSet.hpp

package info (click to toggle)
pbseqlib 0~20161219-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 5,924 kB
  • ctags: 5,123
  • sloc: cpp: 82,727; makefile: 305; python: 239; sh: 8
file content (124 lines) | stat: -rw-r--r-- 3,372 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#ifndef SIMULATOR_OUTPUT_SAMPLE_LIST_SET_H_
#define SIMULATOR_OUTPUT_SAMPLE_LIST_SET_H_

#include <map>
#include <string>
#include <iostream>
#include "../../pbdata/utils.hpp"
#include "OutputSampleList.hpp"


using namespace std;

typedef map<string, OutputSampleList> OutputSampleListMap;
class OutputSampleListSet {
 public:
  OutputSampleListMap listMap;
  vector<string> keys;
  int keyLength;
  int nSufficient;
  int sampleSpaceSize;
  int keySize;
  int minSamples;
  int maxSamples;
  vector<int> lengths;
  OutputSampleListSet(int keySizeP) {
    minSamples = 500;
    maxSamples = 2000;
    nSufficient = 0;
    keySize = keySizeP;
    sampleSpaceSize = 1 << (2*keySize);
  }

  void Write(ofstream &out) {
    // Say how many elements to write.
    OutputSampleListMap::iterator mapIt;
    int setSize = listMap.size();
    out.write((char*) &setSize, sizeof(int));
    int keySize = 0;
    // Say how large each element is.
    if (listMap.size() > 0) {
      keySize = listMap.begin()->first.size();
    }
    out.write((char*)&keySize, sizeof(int));

    for (mapIt = listMap.begin(); mapIt != listMap.end(); ++mapIt) {
      string mapItKey = mapIt->first;
      out.write((char*) mapItKey.c_str(), sizeof(char) * mapItKey.size());
      mapIt->second.Write(out);
    }
    int numLengths = lengths.size();
    out.write((char*) &numLengths, sizeof(int));
    int i;
    for ( i = 0; i < lengths.size(); i++) {
      out.write((char*) &lengths[i], sizeof(int));
    }
  }

  void Read(string &inName) {
    ifstream in;
    CrucialOpen(inName, in, std::ios::in|std::ios::binary);
    Read(in);
    in.close();
  }

  void Read(ifstream &in) {
    int setSize;
    in.read((char*) &setSize, sizeof(int));
    in.read((char*) &keyLength, sizeof(int));

    if (keyLength == 0 or setSize == 0) { return; }
    char *key = ProtectedNew<char>(keyLength+1);
    key[keyLength] = '\0';
    int i;
    for (i = 0; i < setSize; i++) {
      in.read(key, sizeof(char)*keyLength);
      listMap[key].Read(in);
    }
    int numLengths;
    in.read((char*) &numLengths, sizeof(int));
    if (numLengths > 0) {
      lengths.resize(numLengths);
    }
    for (i = 0; i < numLengths; i++) {
      in.read((char*) &lengths[i], sizeof(int));
    }
    if (key) {delete [] key; key = NULL;}
  }

  void AppendOutputSample(string key, OutputSample &sample) {
      if (listMap[key].size() < minSamples) {
          if (listMap[key].size() < maxSamples) {
              listMap[key].push_back(sample);
          }
          if (listMap[key].size() == minSamples) {
              nSufficient++;
              cout << nSufficient << " / " << sampleSpaceSize << endl;
          }
      }
  }

  bool Sufficient() {
      return nSufficient == sampleSpaceSize;
  }

  void SampleRandomSample(string key, OutputSample &sample) {
      if (listMap.find(key) == listMap.end()) {
          cout << listMap.size() << endl;
          cout <<"ERROR, " << key << " is not a sampled context." << endl;
          int i;
          for (i = 0; i < key.size(); i++) {
              char c = toupper(key[i]);
              if (c != 'A' and c != 'C' and c != 'G' and c != 'T') {
                  cout << "The nucleotide " << c << " is not supported." << endl;
              }
          }
          exit(1);
      }
      sample = listMap[key][RandomInt(listMap[key].size())];
  }


};

#endif