File: debruijn_data.hpp

package info (click to toggle)
spades 3.13.1+dfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 22,172 kB
  • sloc: cpp: 136,213; ansic: 48,218; python: 16,809; perl: 4,252; sh: 2,115; java: 890; makefile: 507; pascal: 348; xml: 303
file content (170 lines) | stat: -rw-r--r-- 4,439 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
//***************************************************************************
//* Copyright (c) 2015 Saint Petersburg State University
//* All Rights Reserved
//* See file LICENSE for details.
//***************************************************************************

#pragma once

#include <vector>
#include <set>
#include <cstring>
#include "utils/verify.hpp"
#include "utils/logger/logger.hpp"
#include "sequence/sequence_tools.hpp"
#include "utils/standard_base.hpp"

namespace debruijn_graph {
class DeBruijnMaster;

class DeBruijnVertexData {
    friend class DeBruinMaster;
public:
    DeBruijnVertexData() {

    }
};

class CoverageData {
 private:
    unsigned coverage_;

 public:
    CoverageData()
            : coverage_(0) {
    }

    void inc_coverage(int value) {
        VERIFY(value >= 0 || coverage_ > unsigned(-value));
        coverage_ += value;
    }

    void set_coverage(unsigned coverage) {
        coverage_ = coverage;
    }

    //not length normalized
    unsigned coverage() const {
        return coverage_;
    }
};

class DeBruijnEdgeData {
    friend class DeBruinMaster;
    CoverageData coverage_;
    CoverageData flanking_cov_;
    Sequence nucls_;
public:

    DeBruijnEdgeData(const Sequence &nucls) :
            nucls_(nucls) {
    }

    const Sequence& nucls() const {
        return nucls_;
    }

    void inc_raw_coverage(int value) {
        coverage_.inc_coverage(value);
    }

    void set_raw_coverage(unsigned coverage) {
        coverage_.set_coverage(coverage);
    }

    unsigned raw_coverage() const {
        return coverage_.coverage();
    }

    void inc_flanking_coverage(int value) {
        flanking_cov_.inc_coverage(value);
    }

    void set_flanking_coverage(unsigned flanking_coverage) {
        flanking_cov_.set_coverage(flanking_coverage);
    }

    //not length normalized
    unsigned flanking_coverage() const {
        return flanking_cov_.coverage();
    }

    size_t size() const {
        return nucls_.size();
    }
};

class DeBruijnDataMaster {
private:
    const size_t k_;

public:
    typedef DeBruijnVertexData VertexData;
    typedef DeBruijnEdgeData EdgeData;

    DeBruijnDataMaster(size_t k) :
            k_(k) {
    }

    const EdgeData MergeData(const std::vector<const EdgeData*>& to_merge, bool safe_merging = true) const;

    std::pair<VertexData, std::pair<EdgeData, EdgeData>> SplitData(const EdgeData& edge, size_t position, bool is_self_conj = false) const;

    EdgeData GlueData(const EdgeData&, const EdgeData& data2) const;

    bool isSelfConjugate(const EdgeData &data) const {
        return data.nucls() == !(data.nucls());
    }

    EdgeData conjugate(const EdgeData &data) const {
        return EdgeData(!(data.nucls()));
    }

    VertexData conjugate(const VertexData & /*data*/) const {
        return VertexData();
    }

    size_t length(const EdgeData& data) const {
        return data.nucls().size() - k_;
    }

    size_t length(const VertexData& ) const {
        return k_;
    }

    size_t k() const {
        return k_;
    }

};

//typedef DeBruijnVertexData VertexData;
//typedef DeBruijnEdgeData EdgeData;
//typedef DeBruijnDataMaster DataMaster;

inline const DeBruijnEdgeData DeBruijnDataMaster::MergeData(const std::vector<const DeBruijnEdgeData*>& to_merge, bool safe_merging) const {
    std::vector<Sequence> ss;
    ss.reserve(to_merge.size());
    for (auto it = to_merge.begin(); it != to_merge.end(); ++it) {
        ss.push_back((*it)->nucls());
    }
    return EdgeData(MergeOverlappingSequences(ss, k_, safe_merging));
}

inline std::pair<DeBruijnVertexData, std::pair<DeBruijnEdgeData, DeBruijnEdgeData>> DeBruijnDataMaster::SplitData(const EdgeData& edge,
                                                                                                                  size_t position, 
                                                                                                                  bool is_self_conj) const {
    const Sequence& nucls = edge.nucls();
    size_t end = nucls.size();
    if (is_self_conj) {
        VERIFY(position < end);
        end -= position;
    }
    return std::make_pair(VertexData(), std::make_pair(EdgeData(edge.nucls().Subseq(0, position + k_)), EdgeData(nucls.Subseq(position, end))));
}

inline DeBruijnEdgeData DeBruijnDataMaster::GlueData(const DeBruijnEdgeData&, const DeBruijnEdgeData& data2) const {
    return data2;
}

}