File: FragmentCCSIterator.cpp

package info (click to toggle)
pbseqlib 5.3.5%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 7,020 kB
  • sloc: cpp: 77,250; python: 331; sh: 103; makefile: 41
file content (108 lines) | stat: -rw-r--r-- 4,147 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#include <alignment/files/FragmentCCSIterator.hpp>

void FragmentCCSIterator::Initialize(CCSSequence *_seqPtr, RegionTable *_regionTablePtr)
{
    seqPtr = _seqPtr;
    regionTablePtr = _regionTablePtr;
    curPass = 0;
    numPasses = 0;
    subreadIntervals.clear();
    readIntervalDirection.clear();

    int hqRegionStart, hqRegionEnd, hqRegionScore;
    hqRegionStart = hqRegionEnd = hqRegionScore = 0;

    bool hasHQRegion = LookupHQRegion(seqPtr->zmwData.holeNumber, *regionTablePtr, hqRegionStart,
                                      hqRegionEnd, hqRegionScore);

    if (not hasHQRegion) {
        return;  // Don't bother if there is no HQ region.
    }

    //
    // Since this iterator covers all passes, and not just those
    // included in the ccs, the the regions need to be loaded.
    subreadIntervals =
        (*regionTablePtr)[seqPtr->HoleNumber()].SubreadIntervals(seqPtr->unrolledRead.length, true);
    if (subreadIntervals.size() == 0) {
        return;
    }

    readIntervalDirection.resize(subreadIntervals.size());
    std::fill(readIntervalDirection.begin(), readIntervalDirection.end(), 2);

    //
    // Assign the read interval directions based on the pass direction
    // for the pass that has a similar start position.  This allows
    // some wiggle although in practice they coordinates of the pass
    // start base and the template should always match up.
    //
    int i, j;
    for (i = 0; i < static_cast<int>(subreadIntervals.size()); i++) {
        for (j = 0; j < static_cast<int>(seqPtr->passStartBase.size()); j++) {
            if (std::abs(subreadIntervals[i].start - static_cast<int>(seqPtr->passStartBase[j])) <
                10) {
                readIntervalDirection[i] = seqPtr->passDirection[j];
                break;
            }
        }
    }

    int firstAssignedSubread = 0;
    while (firstAssignedSubread < static_cast<int>(subreadIntervals.size()) and
           readIntervalDirection[firstAssignedSubread] == 2) {
        firstAssignedSubread++;
    }

    if (firstAssignedSubread == static_cast<int>(subreadIntervals.size())) {
        // None of the subread has been assigned a direction, guess.
        firstAssignedSubread = 0;
        readIntervalDirection[0] = 0;
    }

    // Assign directions to intervals to the left of the first assigned.
    if (firstAssignedSubread < static_cast<int>(subreadIntervals.size()) and
        subreadIntervals.size() > 0) {
        int curSubreadDir = readIntervalDirection[firstAssignedSubread];
        assert(curSubreadDir == 0 or curSubreadDir == 1);
        for (i = firstAssignedSubread - 1; i >= 0; i--) {
            curSubreadDir = (curSubreadDir == 0) ? 1 : 0;
            readIntervalDirection[i] = curSubreadDir;
        }
    }

    // Assign directions to intervals which are to the right of the first
    // assigned and whose direction is unknown.
    for (i = firstAssignedSubread + 1; i < static_cast<int>(subreadIntervals.size()); i++) {
        int &di = readIntervalDirection[i];
        int dp = readIntervalDirection[i - 1];
        if (di != 0 and di != 1) {
            di = (dp == 0) ? 1 : 0;
        }
    }

    //
    // So far, subreadIntervals have been sorted and each assigned
    // a passDirection. But since all or part of a subreadInterval
    // may not be in the HQ region, we need to trim low quality regions
    // from subreads, remove subreads which do not have any high quality
    // regions from subreadIntervals and their corresponding pass directions
    // from readIntervalDirection.
    //
    GetHighQualitySubreadsIntervals(subreadIntervals, readIntervalDirection, hqRegionStart,
                                    hqRegionEnd);
    // Update number of passes.
    numPasses = subreadIntervals.size();
}

int FragmentCCSIterator::GetNext(int &direction, int &startBase, int &numBases)
{
    if (curPass >= int(subreadIntervals.size())) {
        return 0;
    }
    direction = readIntervalDirection[curPass];
    startBase = subreadIntervals[curPass].start;
    numBases = subreadIntervals[curPass].end - subreadIntervals[curPass].start;
    ++curPass;
    return 1;
}