File: TupleMatchingImpl.hpp

package info (click to toggle)
pbseqlib 5.3.4%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 7,020 kB
  • sloc: cpp: 77,246; python: 331; sh: 103; makefile: 42
file content (104 lines) | stat: -rw-r--r-- 3,418 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#include <algorithm>
#include <cstdint>
#include <iostream>
#include <utility>
#include <vector>

#include <pbdata/Types.h>
#include <pbdata/DNASequence.hpp>
#include <pbdata/NucConversion.hpp>
#include <pbdata/SeqUtils.hpp>

template <typename Sequence, typename T_TupleList>
int SequenceToTupleList(Sequence &seq, TupleMetrics &tm, T_TupleList &tupleList)
{
    int s;
    typename T_TupleList::Tuple tempTuple;
    if (seq.size() < tm.tupleSize) {
        return 1;
    }

    // Otherwise, there is at least one tuple

    tupleList.Append(tempTuple);
    int res = 0;
    for (s = 0; s < seq.length - tm.tupleSize + 1; s++) {
        if ((res and (res = tempTuple.ShiftAddRL(seq.seq[s + tm.tupleSize - 1], tm))) or
            (!res and (res = tempTuple.FromStringRL(&seq.seq[s], tm)))) {
            tempTuple.ShiftAddRL(seq.seq[s + tm.tupleSize - 1], tm);
            tempTuple.pos = s;
            tupleList.Append(tempTuple);
        }
    }
    return 1;
}

template <typename TSequence, typename TMatch, typename T_TupleList>
int StoreMatchingPositions(TSequence &querySeq, TupleMetrics &tm, T_TupleList &targetTupleList,
                           std::vector<TMatch> &matchSet)
{
    DNALength s;
    //	TQueryTuple queryTuple;
    typename T_TupleList::Tuple queryTuple;
    queryTuple.pos = 0;
    if (querySeq.length >= static_cast<DNALength>(tm.tupleSize)) {
        int res = 0;
        for (s = 0; s < querySeq.length - tm.tupleSize + 1; s++) {
            if ((res and (res = queryTuple.ShiftAddRL(querySeq.seq[s + tm.tupleSize - 1], tm))) or
                (!res and (res = queryTuple.FromStringRL(&querySeq.seq[s], tm)))) {
                int targetListIndex = 0;
                typename std::vector<typename T_TupleList::Tuple>::const_iterator curIt, endIt;
                targetTupleList.FindAll(queryTuple, curIt, endIt);

                for (; curIt != endIt; curIt++) {
                    matchSet.push_back(TMatch(s, (*curIt).pos));
                    ++targetListIndex;
                }
            }
        }
    }
    return matchSet.size();
}

template <typename Sequence, typename Tuple>
int StoreUniqueTuplePosList(Sequence seq, TupleMetrics &tm, std::vector<int> &uniqueTuplePosList)
{
    //
    // Do this faster later on with a suffix tree -- faster than n log n construction time.
    //
    int s;
    std::vector<std::pair<Tuple, int> > tuples;
    Tuple tempTuple;
    for (s = 0; s < seq.length - tm.tupleSize + 1; s++) {
        tempTuple.FromStringRL(&(seq.seq[s]), tm);
        tuples.push_back(make_pair(tempTuple, s));
    }
    std::sort(tuples.begin(), tuples.end());
    int curUnique = 0, curPos = 0;

    //
    // Filter out the repetitive tuples.
    //

    while (curPos < tuples.size()) {
        int nextPos = curPos;

        while (nextPos < tuples.size() and tuples[nextPos] == tuples[curPos])
            nextPos++;
        if (nextPos - curPos == 1) {
            tuples[curUnique].first == tuples[curPos].first;
            uniqueTuplePosList.push_back(tuples[curUnique].second);
            ++curUnique;
            ++curPos;
        } else {
            curPos = nextPos;
        }
    }

    //
    // Be nice and leave the pos list in ascending sorted order,
    // even though the top of this function does not specify it.
    //
    std::sort(uniqueTuplePosList.begin(), uniqueTuplePosList.end());
    return uniqueTuplePosList.size();
}