File: BWTSearch.cpp

package info (click to toggle)
pbseqlib 5.3.1%2Bdfsg-2.1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 7,136 kB
  • sloc: cpp: 77,246; python: 570; makefile: 312; sh: 111; ansic: 9
file content (76 lines) | stat: -rw-r--r-- 2,745 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#include <alignment/algorithms/anchoring/BWTSearch.hpp>

int MapReadToGenome(BWT &bwt, FASTASequence &seq, DNALength subreadStart, DNALength subreadEnd,
                    std::vector<ChainedMatchPos> &matchPosList, AnchorParameters &params,
                    int &numBasesAnchored, std::vector<DNALength> &spv, std::vector<DNALength> &epv)
{

    FASTASequence prefix;
    numBasesAnchored = 0;
    if (subreadEnd - subreadStart < params.minMatchLength) {
        return 0;
    } else {
        DNALength p;
        prefix.seq = seq.seq;
        for (p = subreadStart + params.minMatchLength; p < subreadEnd; p++) {
            //
            // Try reusing the vectors between calls - not thread
            // safe replace function call with one that has access
            // to a buffer class.
            //
            spv.clear();
            epv.clear();
            prefix.length = p;
            bwt.Count(prefix, spv, epv);

            DNALength matchLength = spv.size();
            //
            // Keep going without subtracting from zero if there
            // are no hits.
            //
            if (spv.size() == 0) {
                continue;
            }

            DNALength i;
            std::vector<DNALength> matches;
            while (matchLength >= params.minMatchLength) {
                i = matchLength - 1;

                if (matchLength > 0 and epv[i] >= spv[i]) {
                    //
                    // Add the positions of the matches here.
                    //
                    matches.clear();
                    if (epv[i] - spv[i] + 1 < params.maxAnchorsPerPosition) {
                        numBasesAnchored++;
                        bwt.Locate(spv[i], epv[i], matches);
                    }
                    break;
                }
                matchLength--;
            }

            // Convert from genome positions to tuples
            DNALength m;
            for (m = 0; m < matches.size(); m++) {
                // This if statement is a workaround for a bug
                // that is allowing short matches
                if (matches[m] >= matchLength) {
                    matchPosList.push_back(ChainedMatchPos(
                        matches[m] - matchLength, p - matchLength, matchLength, matches.size()));
                }
            }
        }
    }
    return matchPosList.size();
}

int MapReadToGenome(BWT &bwt, FASTASequence &seq, DNALength start, DNALength end,
                    std::vector<ChainedMatchPos> &matchPosList, AnchorParameters &params,
                    int &numBasesAnchored)
{
    std::vector<DNALength> spv, epv;

    return MapReadToGenome(bwt, seq, start, end, matchPosList, params, numBasesAnchored, spv, epv);
}