File: RAlgorithmsShort.h

package info (click to toggle)
abyss 2.2.5%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 15,640 kB
  • sloc: cpp: 109,409; ansic: 6,510; makefile: 2,194; java: 1,354; sh: 732; perl: 672; haskell: 412; python: 45
file content (152 lines) | stat: -rw-r--r-- 3,503 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#ifndef RRESOLVER_RALGORITHMS_SHORT_H
#define RRESOLVER_RALGORITHMS_SHORT_H 1

#include "RUtils.h"
#include "BloomFilters.h"
#include "Contigs.h"
#include "SequenceTree.h"
#include "Common/Histogram.h"

#include <vector>
#include <string>
#include <cassert>
#include <fstream>
#include <climits>
#include <cmath>

const int    MIN_MARGIN = 2;
const int    MAX_TESTS_OFFSET = 16;
const int    R_VALUES_STEP = 20;
const int    R_STEPS_MAX = 1;
const int    R_MAX_K_DIFF = 80;
const int    MAX_SUBITERATIONS = 2;
const long   HIST_SAMPLE_SIZE = LONG_MAX;
const long   REPEAT_CASES_LIMIT = LONG_MAX;
const int    RMER_QUALITY_THRESHOLD = 30;
const long   READ_STATS_SAMPLE_SIZE = 100000;
const double READ_BATCH_FRACTION_THRESHOLD = 0.30;
const long   PATH_COMBINATIONS_MULTITHREAD_THRESHOLD = 5000;
const double SUPPORTED_PATHS_MIN = 0.15;
const double COV_APPROX_FORMULA_FACTOR = 5.00;
const double SPACED_SEEDS_SNP_FRACTION = 1.00;
const int    MAX_READ_SIZE = 300;

namespace opt {

  /** Read Bloom filter size in bytes. */
  extern size_t bloomSize;

  /** The number of parallel threads */
  extern int threads;

  /** Prefix for the histogram files */
  extern std::string histPrefix;

  /** Name of the file to write resulting graph to */
  extern std::string outputGraphPath;

  /** Name of the file to write resulting graph to */
  extern std::string outputContigsPath;

  /** Number of kmers required to be found for a path to be supported */
  extern int threshold;

  /** Minimum number of sliding window moves */
  extern int min_tests;

  /** Maximum number of branching paths */
  extern int branching;

  /** Flag indicating whether error correction is enabled */
  extern int error_correction;

  /** Name of the file to write supported paths to */
  extern std::string outputSupportedPathsPath;

  /** Name of the file to write unsupported paths to */
  extern std::string outputUnsupportedPathsPath;

  extern unsigned k;  // used by ContigProperties

  extern int format;  // used by ContigProperties

}

class Support {

public:

  Support() = default;

  Support(int calculatedTests): found(-1), tests(-1), calculatedTests(calculatedTests) {
    assert(calculatedTests >= 0);
  }

  Support(int found, int tests):
    found(found), tests(tests)
  {
    assert(found >= 0);
    assert(tests > 0);
    assert(calculatedTests == -1);
  }

  Support(int found, int tests, int calculatedTests):
    found(found), tests(tests), calculatedTests(calculatedTests)
  {
    assert(found >= 0);
    assert(tests > 0);
    assert(calculatedTests >= 0);
  }

  bool unknown() const {
    return tests == -1;
  }

  void reset() {
    found = -1;
    tests = -1;
  }

  bool good() const {
    return unknown() || found >= opt::threshold;
  }

  bool operator>(const Support& s) {
    return found > s.found;
  }

  bool operator<(const Support& s) {
    return found < s.found;
  }

  int found = -1;
  int tests = -1;
  int calculatedTests = -1;

};

class ReadBatch {

public:

  ReadBatch(int size): size(size) {}

  double getFractionOfTotal() const { return double(sampleCount) / double(readsSampleSize); }

  int size;
  std::vector<int> rValues;
  Histogram qualThresholdPositions;
  long sampleCount = 0;

  static long readsSampleSize;
  static std::vector<ReadBatch> batches;
  static ReadBatch current;

};

void
resolveShort(const std::vector<std::string>& readFilepaths,
             ImaginaryContigPaths& supportedPaths,
             ImaginaryContigPaths& unsupportedPaths);

#endif